From 0b96eeae13886d11c23a2573fc01b9a37bfe1740 Mon Sep 17 00:00:00 2001
From: chenfeiyu <chenfeiyu@baidu.com>
Date: Thu, 19 Mar 2020 03:26:46 +0000
Subject: [PATCH 01/11] fix integer data type for deepvoice3's data loader

---
 examples/deepvoice3/data.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/deepvoice3/data.py b/examples/deepvoice3/data.py
index 0d0aaeb..4638174 100644
--- a/examples/deepvoice3/data.py
+++ b/examples/deepvoice3/data.py
@@ -189,11 +189,14 @@ class DataCollector(object):
         # text positions
         text_mask = (np.arange(1, 1 + max_text_length) <= np.expand_dims(
             text_lengths, -1)).astype(np.int64)
-        text_positions = np.arange(1, 1 + max_text_length) * text_mask
+        text_positions = np.arange(
+            1, 1 + max_text_length, dtype=np.int64) * text_mask
 
         # decoder_positions
         decoder_positions = np.tile(
-            np.expand_dims(np.arange(1, 1 + max_decoder_length), 0),
+            np.expand_dims(
+                np.arange(
+                    1, 1 + max_decoder_length, dtype=np.int64), 0),
             (batch_size, 1))
 
         return (text_sequences, text_lengths, text_positions, mel_specs,

From d77557b1a2c8166630d3edbf364f69f23266b346 Mon Sep 17 00:00:00 2001
From: chenfeiyu <chenfeiyu@baidu.com>
Date: Sat, 21 Mar 2020 11:39:54 +0000
Subject: [PATCH 02/11] fix for examples/wavenet: remove weight norm after
 loading model

---
 examples/wavenet/synthesis.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/examples/wavenet/synthesis.py b/examples/wavenet/synthesis.py
index 9b5487b..346370f 100644
--- a/examples/wavenet/synthesis.py
+++ b/examples/wavenet/synthesis.py
@@ -21,6 +21,7 @@ from tensorboardX import SummaryWriter
 from paddle import fluid
 import paddle.fluid.dygraph as dg
 
+from parakeet.modules.weight_norm import WeightNormWrapper
 from parakeet.data import SliceDataset, TransformDataset, DataCargo, SequentialSampler, RandomSampler
 from parakeet.models.wavenet import UpsampleNet, WaveNet, ConditionalWavenet
 from parakeet.utils.layer_tools import summary
@@ -114,6 +115,10 @@ if __name__ == "__main__":
         print("Loading from {}.pdparams".format(args.checkpoint))
         model.set_dict(model_dict)
 
+        for layer in model.sublayers():
+            if isinstance(layer, WeightNormWrapper):
+                layer.remove_weight_norm()
+
         train_loader = fluid.io.DataLoader.from_generator(
             capacity=10, return_list=True)
         train_loader.set_batch_generator(train_cargo, place)

From 67613951d59b16ced5e524f7ad2044c973af2796 Mon Sep 17 00:00:00 2001
From: chenfeiyu <chenfeiyu@baidu.com>
Date: Sat, 21 Mar 2020 11:52:15 +0000
Subject: [PATCH 03/11] minor fixes for wavent and modules

---
 parakeet/models/wavenet/wavenet.py |  1 +
 parakeet/modules/customized.py     |  6 +++---
 parakeet/modules/weight_norm.py    | 10 +++++-----
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/parakeet/models/wavenet/wavenet.py b/parakeet/models/wavenet/wavenet.py
index b4c0d49..49778a5 100644
--- a/parakeet/models/wavenet/wavenet.py
+++ b/parakeet/models/wavenet/wavenet.py
@@ -313,6 +313,7 @@ class WaveNet(dg.Layer):
         """
         # Causal Conv
         if self.loss_type == "softmax":
+            x = F.clip(x, min=-1., max=0.99999)
             x = quantize(x, self.output_dim)
             x = self.embed(x)  # (B, T, C), T=1
         else:
diff --git a/parakeet/modules/customized.py b/parakeet/modules/customized.py
index 3b9a89b..2bb4574 100644
--- a/parakeet/modules/customized.py
+++ b/parakeet/modules/customized.py
@@ -86,7 +86,7 @@ class Conv1D(dg.Conv2D):
                  stride=1,
                  padding=0,
                  dilation=1,
-                 groups=None,
+                 groups=1,
                  param_attr=None,
                  bias_attr=None,
                  use_cudnn=True,
@@ -128,7 +128,7 @@ class Conv1DTranspose(dg.Conv2DTranspose):
                  padding=0,
                  stride=1,
                  dilation=1,
-                 groups=None,
+                 groups=1,
                  param_attr=None,
                  bias_attr=None,
                  use_cudnn=True,
@@ -179,7 +179,7 @@ class Conv1DCell(Conv1D):
                  filter_size,
                  dilation=1,
                  causal=False,
-                 groups=None,
+                 groups=1,
                  param_attr=None,
                  bias_attr=None,
                  use_cudnn=True,
diff --git a/parakeet/modules/weight_norm.py b/parakeet/modules/weight_norm.py
index 92f1085..1f05ffb 100644
--- a/parakeet/modules/weight_norm.py
+++ b/parakeet/modules/weight_norm.py
@@ -160,7 +160,7 @@ def Conv1D(num_channels,
            stride=1,
            padding=0,
            dilation=1,
-           groups=None,
+           groups=1,
            param_attr=None,
            bias_attr=None,
            use_cudnn=True,
@@ -179,7 +179,7 @@ def Conv1DTranspose(num_channels,
                     padding=0,
                     stride=1,
                     dilation=1,
-                    groups=None,
+                    groups=1,
                     param_attr=None,
                     bias_attr=None,
                     use_cudnn=True,
@@ -197,7 +197,7 @@ def Conv1DCell(num_channels,
                filter_size,
                dilation=1,
                causal=False,
-               groups=None,
+               groups=1,
                param_attr=None,
                bias_attr=None,
                use_cudnn=True,
@@ -216,7 +216,7 @@ def Conv2D(num_channels,
            stride=1,
            padding=0,
            dilation=1,
-           groups=None,
+           groups=1,
            param_attr=None,
            bias_attr=None,
            use_cudnn=True,
@@ -237,7 +237,7 @@ def Conv2DTranspose(num_channels,
                     padding=0,
                     stride=1,
                     dilation=1,
-                    groups=None,
+                    groups=1,
                     param_attr=None,
                     bias_attr=None,
                     use_cudnn=True,

From 2a1819a19c2ca7b536f8e7121e5abb474c0b760a Mon Sep 17 00:00:00 2001
From: chenfeiyu <chenfeiyu@baidu.com>
Date: Sat, 21 Mar 2020 15:08:51 +0000
Subject: [PATCH 04/11] add warning in Conv1DCell and synthesis.py for wavenet
 and deepvoice 3(auto-regressive models)

---
 examples/deepvoice3/synthesis.py | 2 ++
 examples/wavenet/synthesis.py    | 2 ++
 parakeet/modules/customized.py   | 6 ++++++
 3 files changed, 10 insertions(+)

diff --git a/examples/deepvoice3/synthesis.py b/examples/deepvoice3/synthesis.py
index 6d79d46..0631dae 100644
--- a/examples/deepvoice3/synthesis.py
+++ b/examples/deepvoice3/synthesis.py
@@ -101,6 +101,8 @@ if __name__ == "__main__":
         state, _ = dg.load_dygraph(args.checkpoint)
         dv3.set_dict(state)
 
+        # WARNING: don't forget to remove weight norm to re-compute each wrapped layer's weight
+        # removing weight norm also speeds up computation
         for layer in dv3.sublayers():
             if isinstance(layer, WeightNormWrapper):
                 layer.remove_weight_norm()
diff --git a/examples/wavenet/synthesis.py b/examples/wavenet/synthesis.py
index 346370f..f3d4c93 100644
--- a/examples/wavenet/synthesis.py
+++ b/examples/wavenet/synthesis.py
@@ -115,6 +115,8 @@ if __name__ == "__main__":
         print("Loading from {}.pdparams".format(args.checkpoint))
         model.set_dict(model_dict)
 
+        # WARNING: don't forget to remove weight norm to re-compute each wrapped layer's weight
+        # removing weight norm also speeds up computation
         for layer in model.sublayers():
             if isinstance(layer, WeightNormWrapper):
                 layer.remove_weight_norm()
diff --git a/parakeet/modules/customized.py b/parakeet/modules/customized.py
index 2bb4574..84ca68c 100644
--- a/parakeet/modules/customized.py
+++ b/parakeet/modules/customized.py
@@ -225,6 +225,12 @@ class Conv1DCell(Conv1D):
 
     def start_sequence(self):
         """Prepare the Conv1DCell to generate a new sequence, this method should be called before calling add_input multiple times.
+
+        WARNING: 
+            This method accesses `self.weight` directly. If a `Conv1DCell` object is wrapped in a `WeightNormWrapper`, make sure this method is called only after the `WeightNormWrapper`'s hook is called. 
+            `WeightNormWrapper` removes the wrapped layer's `weight`, add has a `weight_v` and `weight_g` to re-compute the wrapped layer's weight as $weight = weight_g * weight_v / ||weight_v||$. (Recomputing the `weight` is a hook before calling the wrapped layer's `forward` method.)
+            Whenever a `WeightNormWrapper`'s `forward` method is called, the wrapped layer's weight is updated. But when loading from a checkpoint, `weight_v` and `weight_g` are updated but the wrapped layer's weight is not, since it is no longer a `Parameter`. You should manually call `remove_weight_norm` or `hook` to re-compute the wrapped layer's weight before calling this method if you don't call `forward` first.
+            So when loading a model which uses `Conv1DCell` objects wrapped in `WeightNormWrapper`s, remember to call `remove_weight_norm` for all `WeightNormWrapper`s before synthesizing. Also, removing weight norm speeds up computation.
         """
         if not self.causal:
             raise ValueError(

From 64790853e5688a262a0e2a144db0c6ae5335dccc Mon Sep 17 00:00:00 2001
From: liuyibing01 <liuyibing01@baidu.com>
Date: Sun, 22 Mar 2020 08:05:05 +0000
Subject: [PATCH 05/11] Unify save & load interfaces

---
 examples/waveflow/benchmark.py       |   3 +-
 examples/waveflow/synthesis.py       |   5 +-
 examples/waveflow/train.py           |   5 +-
 examples/waveflow/utils.py           | 135 -----------------------
 parakeet/models/waveflow/waveflow.py |  12 +-
 parakeet/utils/io.py                 | 158 +++++++++++++++++++++++++++
 6 files changed, 173 insertions(+), 145 deletions(-)
 create mode 100644 parakeet/utils/io.py

diff --git a/examples/waveflow/benchmark.py b/examples/waveflow/benchmark.py
index 3badeda..0581471 100644
--- a/examples/waveflow/benchmark.py
+++ b/examples/waveflow/benchmark.py
@@ -22,6 +22,7 @@ import paddle.fluid.dygraph as dg
 from paddle import fluid
 
 import utils
+from parakeet.utils import io
 from parakeet.models.waveflow import WaveFlow
 
 
@@ -98,5 +99,5 @@ if __name__ == "__main__":
     # For conflicting updates to the same field,
     # the preceding update will be overwritten by the following one.
     config = parser.parse_args()
-    config = utils.add_yaml_config(config)
+    config = io.add_yaml_config_to_args(config)
     benchmark(config)
diff --git a/examples/waveflow/synthesis.py b/examples/waveflow/synthesis.py
index 0647e94..5f3dd5a 100644
--- a/examples/waveflow/synthesis.py
+++ b/examples/waveflow/synthesis.py
@@ -23,6 +23,7 @@ from paddle import fluid
 
 import utils
 from parakeet.models.waveflow import WaveFlow
+from parakeet.utils import io
 
 
 def add_options_to_parser(parser):
@@ -96,7 +97,7 @@ def synthesize(config):
         # Obtain the current iteration.
         if config.checkpoint is None:
             if config.iteration is None:
-                iteration = utils.load_latest_checkpoint(checkpoint_dir)
+                iteration = io.load_latest_checkpoint(checkpoint_dir)
             else:
                 iteration = config.iteration
         else:
@@ -117,5 +118,5 @@ if __name__ == "__main__":
     # For conflicting updates to the same field,
     # the preceding update will be overwritten by the following one.
     config = parser.parse_args()
-    config = utils.add_yaml_config(config)
+    config = io.add_yaml_config_to_args(config)
     synthesize(config)
diff --git a/examples/waveflow/train.py b/examples/waveflow/train.py
index 32059c8..548c5da 100644
--- a/examples/waveflow/train.py
+++ b/examples/waveflow/train.py
@@ -25,6 +25,7 @@ from paddle import fluid
 from tensorboardX import SummaryWriter
 
 import utils
+from parakeet.utils import io
 from parakeet.models.waveflow import WaveFlow
 
 
@@ -104,7 +105,7 @@ def train(config):
         # Obtain the current iteration.
         if config.checkpoint is None:
             if config.iteration is None:
-                iteration = utils.load_latest_checkpoint(checkpoint_dir, rank)
+                iteration = io.load_latest_checkpoint(checkpoint_dir, rank)
             else:
                 iteration = config.iteration
         else:
@@ -140,7 +141,7 @@ if __name__ == "__main__":
     # For conflicting updates to the same field, 
     # the preceding update will be overwritten by the following one.
     config = parser.parse_args()
-    config = utils.add_yaml_config(config)
+    config = io.add_yaml_config_to_args(config)
     # Force to use fp32 in model training
     vars(config)["use_fp16"] = False
     train(config)
diff --git a/examples/waveflow/utils.py b/examples/waveflow/utils.py
index b899073..3f934de 100644
--- a/examples/waveflow/utils.py
+++ b/examples/waveflow/utils.py
@@ -12,14 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import itertools
-import os
-import time
-
 import argparse
-import ruamel.yaml
-import numpy as np
-import paddle.fluid.dygraph as dg
 
 
 def str2bool(v):
@@ -95,131 +88,3 @@ def add_config_options_to_parser(parser):
         '--kernel_w', type=int, help="width of the kernel in the conv2d layer")
 
     parser.add_argument('--config', type=str, help="Path to the config file.")
-
-
-def add_yaml_config(config):
-    with open(config.config, 'rt') as f:
-        yaml_cfg = ruamel.yaml.safe_load(f)
-    cfg_vars = vars(config)
-    for k, v in yaml_cfg.items():
-        if k in cfg_vars and cfg_vars[k] is not None:
-            continue
-        cfg_vars[k] = v
-    return config
-
-
-def load_latest_checkpoint(checkpoint_dir, rank=0):
-    """Get the iteration number corresponding to the latest saved checkpoint
-
-    Args:
-        checkpoint_dir (str): the directory where checkpoint is saved.
-        rank (int, optional): the rank of the process in multi-process setting.
-            Defaults to 0.
-
-    Returns:
-        int: the latest iteration number.
-    """
-    checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
-    # Create checkpoint index file if not exist.
-    if (not os.path.isfile(checkpoint_path)) and rank == 0:
-        with open(checkpoint_path, "w") as handle:
-            handle.write("model_checkpoint_path: step-0")
-
-    # Make sure that other process waits until checkpoint file is created
-    # by process 0.
-    while not os.path.isfile(checkpoint_path):
-        time.sleep(1)
-
-    # Fetch the latest checkpoint index.
-    with open(checkpoint_path, "r") as handle:
-        latest_checkpoint = handle.readline().split()[-1]
-        iteration = int(latest_checkpoint.split("-")[-1])
-
-    return iteration
-
-
-def save_latest_checkpoint(checkpoint_dir, iteration):
-    """Save the iteration number of the latest model to be checkpointed.
-
-    Args:
-        checkpoint_dir (str): the directory where checkpoint is saved.
-        iteration (int): the latest iteration number.
-
-    Returns:
-        None
-    """
-    checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
-    # Update the latest checkpoint index.
-    with open(checkpoint_path, "w") as handle:
-        handle.write("model_checkpoint_path: step-{}".format(iteration))
-
-
-def load_parameters(checkpoint_dir,
-                    rank,
-                    model,
-                    optimizer=None,
-                    iteration=None,
-                    file_path=None,
-                    dtype="float32"):
-    """Load a specific model checkpoint from disk.
-
-    Args:
-        checkpoint_dir (str): the directory where checkpoint is saved.
-        rank (int): the rank of the process in multi-process setting.
-        model (obj): model to load parameters.
-        optimizer (obj, optional): optimizer to load states if needed.
-            Defaults to None.
-        iteration (int, optional): if specified, load the specific checkpoint,
-            if not specified, load the latest one. Defaults to None.
-        file_path (str, optional): if specified, load the checkpoint
-            stored in the file_path. Defaults to None.
-        dtype (str, optional): precision of the model parameters.
-            Defaults to float32.
-
-    Returns:
-        None
-    """
-    if file_path is None:
-        if iteration is None:
-            iteration = load_latest_checkpoint(checkpoint_dir, rank)
-        if iteration == 0:
-            return
-        file_path = "{}/step-{}".format(checkpoint_dir, iteration)
-
-    model_dict, optimizer_dict = dg.load_dygraph(file_path)
-    if dtype == "float16":
-        for k, v in model_dict.items():
-            if "conv2d_transpose" in k:
-                model_dict[k] = v.astype("float32")
-            else:
-                model_dict[k] = v.astype(dtype)
-    model.set_dict(model_dict)
-    print("[checkpoint] Rank {}: loaded model from {}".format(rank, file_path))
-    if optimizer and optimizer_dict:
-        optimizer.set_dict(optimizer_dict)
-        print("[checkpoint] Rank {}: loaded optimizer state from {}".format(
-            rank, file_path))
-
-
-def save_latest_parameters(checkpoint_dir, iteration, model, optimizer=None):
-    """Checkpoint the latest trained model parameters.
-
-    Args:
-        checkpoint_dir (str): the directory where checkpoint is saved.
-        iteration (int): the latest iteration number.
-        model (obj): model to be checkpointed.
-        optimizer (obj, optional): optimizer to be checkpointed.
-            Defaults to None.
-
-    Returns:
-        None
-    """
-    file_path = "{}/step-{}".format(checkpoint_dir, iteration)
-    model_dict = model.state_dict()
-    dg.save_dygraph(model_dict, file_path)
-    print("[checkpoint] Saved model to {}".format(file_path))
-
-    if optimizer:
-        opt_dict = optimizer.state_dict()
-        dg.save_dygraph(opt_dict, file_path)
-        print("[checkpoint] Saved optimzier state to {}".format(file_path))
diff --git a/parakeet/models/waveflow/waveflow.py b/parakeet/models/waveflow/waveflow.py
index 101bb66..faf2fb6 100644
--- a/parakeet/models/waveflow/waveflow.py
+++ b/parakeet/models/waveflow/waveflow.py
@@ -22,6 +22,7 @@ from paddle import fluid
 from scipy.io.wavfile import write
 
 import utils
+from parakeet.utils import io
 from parakeet.modules import weight_norm
 from .data import LJSpeech
 from .waveflow_modules import WaveFlowLoss, WaveFlowModule
@@ -47,6 +48,7 @@ class WaveFlow():
     Returns:
         WaveFlow
     """
+
     def __init__(self,
                  config,
                  checkpoint_dir,
@@ -91,7 +93,7 @@ class WaveFlow():
                 parameter_list=waveflow.parameters())
 
             # Load parameters.
-            utils.load_parameters(
+            io.load_parameters(
                 self.checkpoint_dir,
                 self.rank,
                 waveflow,
@@ -111,7 +113,7 @@ class WaveFlow():
 
         else:
             # Load parameters.
-            utils.load_parameters(
+            io.load_parameters(
                 self.checkpoint_dir,
                 self.rank,
                 waveflow,
@@ -291,6 +293,6 @@ class WaveFlow():
         Returns:
             None
         """
-        utils.save_latest_parameters(self.checkpoint_dir, iteration,
-                                     self.waveflow, self.optimizer)
-        utils.save_latest_checkpoint(self.checkpoint_dir, iteration)
+        io.save_latest_parameters(self.checkpoint_dir, iteration,
+                                  self.waveflow, self.optimizer)
+        io.save_latest_checkpoint(self.checkpoint_dir, iteration)
diff --git a/parakeet/utils/io.py b/parakeet/utils/io.py
new file mode 100644
index 0000000..e612400
--- /dev/null
+++ b/parakeet/utils/io.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import time
+
+import ruamel.yaml
+import numpy as np
+import paddle.fluid.dygraph as dg
+
+
+def add_yaml_config_to_args(config):
+    """ Add args in yaml config to the args parsed by argparse. The argument in 
+        yaml config will be overwritten by the same argument in argparse if they 
+        are both valid.
+    
+    Args:
+        config (args): the args returned by `argparse.ArgumentParser().parse_args()`
+
+    Returns:
+        config: the args added yaml config.
+    """
+    with open(config.config, 'rt') as f:
+        yaml_cfg = ruamel.yaml.safe_load(f)
+    cfg_vars = vars(config)
+    for k, v in yaml_cfg.items():
+        if k in cfg_vars and cfg_vars[k] is not None:
+            continue
+        cfg_vars[k] = v
+    return config
+
+
+def load_latest_checkpoint(checkpoint_dir, rank=0):
+    """Get the iteration number corresponding to the latest saved checkpoint
+
+    Args:
+        checkpoint_dir (str): the directory where checkpoint is saved.
+        rank (int, optional): the rank of the process in multi-process setting.
+            Defaults to 0.
+
+    Returns:
+        int: the latest iteration number.
+    """
+    checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
+    # Create checkpoint index file if not exist.
+    if (not os.path.isfile(checkpoint_path)) and rank == 0:
+        with open(checkpoint_path, "w") as handle:
+            handle.write("model_checkpoint_path: step-0")
+
+    # Make sure that other process waits until checkpoint file is created
+    # by process 0.
+    while not os.path.isfile(checkpoint_path):
+        time.sleep(1)
+
+    # Fetch the latest checkpoint index.
+    with open(checkpoint_path, "r") as handle:
+        latest_checkpoint = handle.readline().split()[-1]
+        iteration = int(latest_checkpoint.split("-")[-1])
+
+    return iteration
+
+
+def save_latest_checkpoint(checkpoint_dir, iteration):
+    """Save the iteration number of the latest model to be checkpointed.
+
+    Args:
+        checkpoint_dir (str): the directory where checkpoint is saved.
+        iteration (int): the latest iteration number.
+
+    Returns:
+        None
+    """
+    checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
+    # Update the latest checkpoint index.
+    with open(checkpoint_path, "w") as handle:
+        handle.write("model_checkpoint_path: step-{}".format(iteration))
+
+
+def load_parameters(checkpoint_dir,
+                    rank,
+                    model,
+                    optimizer=None,
+                    iteration=None,
+                    file_path=None,
+                    dtype="float32"):
+    """Load a specific model checkpoint from disk.
+
+    Args:
+        checkpoint_dir (str): the directory where checkpoint is saved.
+        rank (int): the rank of the process in multi-process setting.
+        model (obj): model to load parameters.
+        optimizer (obj, optional): optimizer to load states if needed.
+            Defaults to None.
+        iteration (int, optional): if specified, load the specific checkpoint,
+            if not specified, load the latest one. Defaults to None.
+        file_path (str, optional): if specified, load the checkpoint
+            stored in the file_path. Defaults to None.
+        dtype (str, optional): precision of the model parameters.
+            Defaults to float32.
+
+    Returns:
+        None
+    """
+    if file_path is None:
+        if iteration is None:
+            iteration = load_latest_checkpoint(checkpoint_dir, rank)
+        if iteration == 0:
+            return
+        file_path = "{}/step-{}".format(checkpoint_dir, iteration)
+
+    model_dict, optimizer_dict = dg.load_dygraph(file_path)
+    if dtype == "float16":
+        for k, v in model_dict.items():
+            if "conv2d_transpose" in k:
+                model_dict[k] = v.astype("float32")
+            else:
+                model_dict[k] = v.astype(dtype)
+    model.set_dict(model_dict)
+    print("[checkpoint] Rank {}: loaded model from {}".format(rank, file_path))
+    if optimizer and optimizer_dict:
+        optimizer.set_dict(optimizer_dict)
+        print("[checkpoint] Rank {}: loaded optimizer state from {}".format(
+            rank, file_path))
+
+
+def save_latest_parameters(checkpoint_dir, iteration, model, optimizer=None):
+    """Checkpoint the latest trained model parameters.
+
+    Args:
+        checkpoint_dir (str): the directory where checkpoint is saved.
+        iteration (int): the latest iteration number.
+        model (obj): model to be checkpointed.
+        optimizer (obj, optional): optimizer to be checkpointed.
+            Defaults to None.
+
+    Returns:
+        None
+    """
+    file_path = "{}/step-{}".format(checkpoint_dir, iteration)
+    model_dict = model.state_dict()
+    dg.save_dygraph(model_dict, file_path)
+    print("[checkpoint] Saved model to {}".format(file_path))
+
+    if optimizer:
+        opt_dict = optimizer.state_dict()
+        dg.save_dygraph(opt_dict, file_path)
+        print("[checkpoint] Saved optimzier state to {}".format(file_path))

From 776743530a16e41788fd2b0300ba2de3ecce6859 Mon Sep 17 00:00:00 2001
From: chenfeiyu <chenfeiyu@baidu.com>
Date: Tue, 24 Mar 2020 08:53:40 +0000
Subject: [PATCH 06/11] update save & load for deep voicde 3, wavenet and
 clarinet, remove the concept of epoch in training

---
 examples/clarinet/README.md               |  22 ++-
 examples/clarinet/synthesis.py            |   2 +-
 examples/clarinet/train.py                | 119 +++++++-----
 examples/deepvoice3/README.md             |  23 ++-
 examples/deepvoice3/configs/ljspeech.yaml |   2 +-
 examples/deepvoice3/synthesis.py          |   7 +-
 examples/deepvoice3/train.py              | 218 +++++++++++-----------
 examples/wavenet/README.md                |  18 +-
 examples/wavenet/synthesis.py             |   5 +-
 examples/wavenet/train.py                 |  82 ++++----
 examples/wavenet/utils.py                 |   7 -
 11 files changed, 272 insertions(+), 233 deletions(-)

diff --git a/examples/clarinet/README.md b/examples/clarinet/README.md
index 459e2f5..9b79897 100644
--- a/examples/clarinet/README.md
+++ b/examples/clarinet/README.md
@@ -28,24 +28,24 @@ Train the model using train.py, follow the usage displayed by `python train.py -
 
 ```text
 usage: train.py [-h] [--config CONFIG] [--device DEVICE] [--output OUTPUT]
-                [--data DATA] [--resume RESUME] [--wavenet WAVENET]
+                [--data DATA] [--checkpoint CHECKPOINT] [--wavenet WAVENET]
 
 train a ClariNet model with LJspeech and a trained WaveNet model.
 
 optional arguments:
-  -h, --help         show this help message and exit
-  --config CONFIG    path of the config file.
-  --device DEVICE    device to use.
-  --output OUTPUT    path to save student.
-  --data DATA        path of LJspeech dataset.
-  --resume RESUME    checkpoint to load from.
-  --wavenet WAVENET  wavenet checkpoint to use.
+  -h, --help                show this help message and exit
+  --config CONFIG           path of the config file.
+  --device DEVICE           device to use.
+  --output OUTPUT           path to save student.
+  --data DATA               path of LJspeech dataset.
+  --checkpoint CHECKPOINT   checkpoint to load from.
+  --wavenet WAVENET         wavenet checkpoint to use.
 ```
 
 - `--config` is the configuration file to use. The provided configurations can be used directly. And you can change some values in the configuration file and train the model with a different config.
 - `--data` is the path of the LJSpeech dataset, the extracted folder from the downloaded archive (the folder which contains metadata.txt).
-- `--resume` is the path of the checkpoint. If it is provided, the model would load the checkpoint before trainig.
-- `--output` is the directory to save results, all result are saved in this directory. The structure of the output directory is shown below.
+- `--checkpoint` is the path of the checkpoint. If it is provided, the model would load the checkpoint before trainig.
+- `--output` is the directory to save results, all result are saved in this directory. The structure of the output directory is shown below.  
 
 ```text
 ├── checkpoints      # checkpoint
@@ -53,6 +53,8 @@ optional arguments:
 └── log              # tensorboard log
 ```
 
+If `checkpoints` is not empty and argument `--checkpoint` is not specified, the model will be resumed from the latest checkpoint at the beginning of training.
+
 - `--device` is the device (gpu id) to use for training. `-1` means CPU.
 - `--wavenet` is the path of the wavenet checkpoint to load. If you do not specify `--resume`, then this must be provided.
 
diff --git a/examples/clarinet/synthesis.py b/examples/clarinet/synthesis.py
index db12035..ce16fc1 100644
--- a/examples/clarinet/synthesis.py
+++ b/examples/clarinet/synthesis.py
@@ -31,7 +31,7 @@ from parakeet.models.clarinet import STFT, Clarinet, ParallelWaveNet
 from parakeet.data import TransformDataset, SliceDataset, RandomSampler, SequentialSampler, DataCargo
 from parakeet.utils.layer_tools import summary, freeze
 
-from utils import valid_model, eval_model, save_checkpoint, load_checkpoint, load_model
+from utils import valid_model, eval_model, load_model
 sys.path.append("../wavenet")
 from data import LJSpeechMetaData, Transform, DataCollector
 
diff --git a/examples/clarinet/train.py b/examples/clarinet/train.py
index c6039b3..dcfff9b 100644
--- a/examples/clarinet/train.py
+++ b/examples/clarinet/train.py
@@ -30,14 +30,15 @@ from parakeet.models.wavenet import WaveNet, UpsampleNet
 from parakeet.models.clarinet import STFT, Clarinet, ParallelWaveNet
 from parakeet.data import TransformDataset, SliceDataset, RandomSampler, SequentialSampler, DataCargo
 from parakeet.utils.layer_tools import summary, freeze
+from parakeet.utils import io
 
-from utils import make_output_tree, valid_model, save_checkpoint, load_checkpoint, load_wavenet
+from utils import make_output_tree, valid_model, load_wavenet
 sys.path.append("../wavenet")
 from data import LJSpeechMetaData, Transform, DataCollector
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="train a clarinet model with LJspeech and a trained wavenet model."
+        description="train a ClariNet model with LJspeech and a trained WaveNet model."
     )
     parser.add_argument("--config", type=str, help="path of the config file.")
     parser.add_argument(
@@ -48,13 +49,18 @@ if __name__ == "__main__":
         default="experiment",
         help="path to save student.")
     parser.add_argument("--data", type=str, help="path of LJspeech dataset.")
-    parser.add_argument("--resume", type=str, help="checkpoint to load from.")
+    parser.add_argument(
+        "--checkpoint", type=str, help="checkpoint to load from.")
     parser.add_argument(
         "--wavenet", type=str, help="wavenet checkpoint to use.")
     args = parser.parse_args()
     with open(args.config, 'rt') as f:
         config = ruamel.yaml.safe_load(f)
 
+    print("Command Line args: ")
+    for k, v in vars(args).items():
+        print("{}: {}".format(k, v))
+
     ljspeech_meta = LJSpeechMetaData(args.data)
 
     data_config = config["data"]
@@ -154,12 +160,38 @@ if __name__ == "__main__":
         clipper = fluid.dygraph_grad_clip.GradClipByGlobalNorm(
             gradiant_max_norm)
 
-        assert args.wavenet or args.resume, "you should load from a trained wavenet or resume training; training without a trained wavenet is not recommended."
-        if args.wavenet:
+        # train
+        max_iterations = train_config["max_iterations"]
+        checkpoint_interval = train_config["checkpoint_interval"]
+        eval_interval = train_config["eval_interval"]
+        checkpoint_dir = os.path.join(args.output, "checkpoints")
+        state_dir = os.path.join(args.output, "states")
+        log_dir = os.path.join(args.output, "log")
+        writer = SummaryWriter(log_dir)
+
+        # load wavenet/checkpoint, determine iterations done
+        if args.checkpoint is not None:
+            iteration = int(os.path.basename(args.checkpoint).split('-')[-1])
+        else:
+            iteration = io.load_latest_checkpoint(checkpoint_dir)
+
+        if iteration == 0 and args.wavenet is None:
+            raise Exception(
+                "you should load from a trained wavenet or resume training; training without a trained wavenet is not recommended."
+            )
+
+        if args.wavenet is not None and iteration > 0:
+            if args.checkpoint is None:
+                print("Resume training, --wavenet ignored")
+            else:
+                print("--checkpoint provided, --wavenet ignored")
+
+        if args.wavenet is not None and iteration == 0:
             load_wavenet(model, args.wavenet)
 
-        if args.resume:
-            load_checkpoint(model, optim, args.resume)
+        # it may overwrite the wavenet loaded
+        io.load_parameters(
+            checkpoint_dir, 0, model, optim, file_path=args.checkpoint)
 
         # loader
         train_loader = fluid.io.DataLoader.from_generator(
@@ -170,52 +202,43 @@ if __name__ == "__main__":
             capacity=10, return_list=True)
         valid_loader.set_batch_generator(valid_cargo, place)
 
-        # train
-        max_iterations = train_config["max_iterations"]
-        checkpoint_interval = train_config["checkpoint_interval"]
-        eval_interval = train_config["eval_interval"]
-        checkpoint_dir = os.path.join(args.output, "checkpoints")
-        state_dir = os.path.join(args.output, "states")
-        log_dir = os.path.join(args.output, "log")
-        writer = SummaryWriter(log_dir)
-
         # training loop
-        global_step = 1
-        global_epoch = 1
+        global_step = iteration + 1
+        iterator = iter(tqdm(train_loader))
         while global_step < max_iterations:
-            epoch_loss = 0.
-            for j, batch in tqdm(enumerate(train_loader), desc="[train]"):
-                audios, mels, audio_starts = batch
-                model.train()
-                loss_dict = model(
-                    audios, mels, audio_starts, clip_kl=global_step > 500)
+            try:
+                batch = next(iterator)
+            except StopIteration as e:
+                iterator = iter(tqdm(train_loader))
+                batch = next(iterator)
 
-                writer.add_scalar("learning_rate",
-                                  optim._learning_rate.step().numpy()[0],
-                                  global_step)
-                for k, v in loss_dict.items():
-                    writer.add_scalar("loss/{}".format(k),
-                                      v.numpy()[0], global_step)
+            audios, mels, audio_starts = batch
+            model.train()
+            loss_dict = model(
+                audios, mels, audio_starts, clip_kl=global_step > 500)
 
-                l = loss_dict["loss"]
-                step_loss = l.numpy()[0]
-                print("[train] loss: {:<8.6f}".format(step_loss))
-                epoch_loss += step_loss
+            writer.add_scalar("learning_rate",
+                              optim._learning_rate.step().numpy()[0],
+                              global_step)
+            for k, v in loss_dict.items():
+                writer.add_scalar("loss/{}".format(k),
+                                  v.numpy()[0], global_step)
 
-                l.backward()
-                optim.minimize(l, grad_clip=clipper)
-                optim.clear_gradients()
+            l = loss_dict["loss"]
+            step_loss = l.numpy()[0]
+            print("[train] loss: {:<8.6f}".format(step_loss))
 
-                if global_step % eval_interval == 0:
-                    # evaluate on valid dataset
-                    valid_model(model, valid_loader, state_dir, global_step,
-                                sample_rate)
-                if global_step % checkpoint_interval == 0:
-                    save_checkpoint(model, optim, checkpoint_dir, global_step)
+            l.backward()
+            optim.minimize(l, grad_clip=clipper)
+            optim.clear_gradients()
 
-                global_step += 1
+            if global_step % eval_interval == 0:
+                # evaluate on valid dataset
+                valid_model(model, valid_loader, state_dir, global_step,
+                            sample_rate)
+            if global_step % checkpoint_interval == 0:
+                io.save_latest_parameters(checkpoint_dir, global_step, model,
+                                          optim)
+                io.save_latest_checkpoint(checkpoint_dir, global_step)
 
-            # epoch loss
-            average_loss = epoch_loss / j
-            writer.add_scalar("average_loss", average_loss, global_epoch)
-            global_epoch += 1
+            global_step += 1
diff --git a/examples/deepvoice3/README.md b/examples/deepvoice3/README.md
index fa7a5e4..7c2ad77 100644
--- a/examples/deepvoice3/README.md
+++ b/examples/deepvoice3/README.md
@@ -35,26 +35,23 @@ The model consists of an encoder, a decoder and a converter (and a speaker embed
 Train the model using train.py, follow the usage displayed by `python train.py --help`.
 
 ```text
-usage: train.py [-h] [-c CONFIG] [-s DATA] [-r RESUME] [-o OUTPUT] [-g DEVICE]
+usage: train.py [-h] [-c CONFIG] [-s DATA] [--checkpoint CHECKPOINT]
+                [-o OUTPUT] [-g DEVICE]
 
 Train a Deep Voice 3 model with LJSpeech dataset.
 
 optional arguments:
-  -h, --help            show this help message and exit
-  -c CONFIG, --config CONFIG
-                        experimrnt config
-  -s DATA, --data DATA  The path of the LJSpeech dataset.
-  -r RESUME, --resume RESUME
-                        checkpoint to load
-  -o OUTPUT, --output OUTPUT
-                        The directory to save result.
-  -g DEVICE, --device DEVICE
-                        device to use
+  -h, --help                      show this help message and exit
+  -c CONFIG, --config CONFIG      experimrnt config
+  -s DATA, --data DATA            The path of the LJSpeech dataset.
+  --checkpoint CHECKPOINT         checkpoint to load
+  -o OUTPUT, --output OUTPUT      The directory to save result.
+  -g DEVICE, --device DEVICE      device to use
 ```
 
 - `--config` is the configuration file to use. The provided `ljspeech.yaml` can be used directly. And you can change some values in the configuration file and train the model with a different config.
 - `--data` is the path of the LJSpeech dataset, the extracted folder from the downloaded archive (the folder which contains metadata.txt).
-- `--resume` is the path of the checkpoint. If it is provided, the model would load the checkpoint before trainig.
+- `--checkpoint` is the path of the checkpoint. If it is provided, the model would load the checkpoint before trainig.
 - `--output` is the directory to save results, all results are saved in this directory. The structure of the output directory is shown below.
 
 ```text
@@ -67,6 +64,8 @@ optional arguments:
     └── waveform     # waveform (.wav files)
 ```
 
+If `checkpoints` is not empty and argument `--checkpoint` is not specified, the model will be resumed from the latest checkpoint at the beginning of training.
+
 - `--device` is the device (gpu id) to use for training. `-1` means CPU.
 
 Example script:
diff --git a/examples/deepvoice3/configs/ljspeech.yaml b/examples/deepvoice3/configs/ljspeech.yaml
index 8aa6b5a..b270719 100644
--- a/examples/deepvoice3/configs/ljspeech.yaml
+++ b/examples/deepvoice3/configs/ljspeech.yaml
@@ -83,7 +83,7 @@ lr_scheduler:
   
 train:
   batch_size: 16
-  epochs: 2000
+  max_iteration: 2000000
   
   snap_interval: 1000
   eval_interval: 10000
diff --git a/examples/deepvoice3/synthesis.py b/examples/deepvoice3/synthesis.py
index 0631dae..d3cd9f0 100644
--- a/examples/deepvoice3/synthesis.py
+++ b/examples/deepvoice3/synthesis.py
@@ -25,8 +25,9 @@ import paddle.fluid.dygraph as dg
 from tensorboardX import SummaryWriter
 
 from parakeet.g2p import en
-from parakeet.utils.layer_tools import summary
 from parakeet.modules.weight_norm import WeightNormWrapper
+from parakeet.utils.layer_tools import summary
+from parakeet.utils.io import load_parameters
 
 from utils import make_model, eval_model, plot_alignment
 
@@ -44,6 +45,10 @@ if __name__ == "__main__":
     with open(args.config, 'rt') as f:
         config = ruamel.yaml.safe_load(f)
 
+    print("Command Line Args: ")
+    for k, v in vars(args).items():
+        print("{}: {}".format(k, v))
+
     if args.device == -1:
         place = fluid.CPUPlace()
     else:
diff --git a/examples/deepvoice3/train.py b/examples/deepvoice3/train.py
index 11f8407..6e0a9ba 100644
--- a/examples/deepvoice3/train.py
+++ b/examples/deepvoice3/train.py
@@ -17,6 +17,8 @@ import os
 import argparse
 import ruamel.yaml
 import numpy as np
+import matplotlib
+matplotlib.use("agg")
 from matplotlib import cm
 import matplotlib.pyplot as plt
 import tqdm
@@ -35,13 +37,14 @@ from parakeet.data import DataCargo, PartialyRandomizedSimilarTimeLengthSampler,
 from parakeet.models.deepvoice3 import Encoder, Decoder, Converter, DeepVoice3, ConvSpec
 from parakeet.models.deepvoice3.loss import TTSLoss
 from parakeet.utils.layer_tools import summary
+from parakeet.utils import io
 
 from data import LJSpeechMetaData, DataCollector, Transform
 from utils import make_model, eval_model, save_state, make_output_tree, plot_alignment
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="Train a deepvoice 3 model with LJSpeech dataset.")
+        description="Train a Deep Voice 3 model with LJSpeech dataset.")
     parser.add_argument("-c", "--config", type=str, help="experimrnt config")
     parser.add_argument(
         "-s",
@@ -49,7 +52,7 @@ if __name__ == "__main__":
         type=str,
         default="/workspace/datasets/LJSpeech-1.1/",
         help="The path of the LJSpeech dataset.")
-    parser.add_argument("-r", "--resume", type=str, help="checkpoint to load")
+    parser.add_argument("--checkpoint", type=str, help="checkpoint to load")
     parser.add_argument(
         "-o",
         "--output",
@@ -62,6 +65,10 @@ if __name__ == "__main__":
     with open(args.config, 'rt') as f:
         config = ruamel.yaml.safe_load(f)
 
+    print("Command Line Args: ")
+    for k, v in vars(args).items():
+        print("{}: {}".format(k, v))
+
     # =========================dataset=========================
     # construct meta data
     data_root = args.data
@@ -151,6 +158,7 @@ if __name__ == "__main__":
             query_position_rate, key_position_rate, window_backward,
             window_ahead, key_projection, value_projection, downsample_factor,
             linear_dim, use_decoder_states, converter_channels, dropout)
+        summary(dv3)
 
         # =========================loss=========================
         loss_config = config["loss"]
@@ -195,7 +203,6 @@ if __name__ == "__main__":
         n_iter = synthesis_config["n_iter"]
 
         # =========================link(dataloader, paddle)=========================
-        # CAUTION: it does not return a DataLoader
         loader = fluid.io.DataLoader.from_generator(
             capacity=10, return_list=True)
         loader.set_batch_generator(ljspeech_loader, places=place)
@@ -208,122 +215,117 @@ if __name__ == "__main__":
         make_output_tree(output_dir)
         writer = SummaryWriter(logdir=log_dir)
 
-        # load model parameters
-        resume_path = args.resume
-        if resume_path is not None:
-            state, _ = dg.load_dygraph(args.resume)
-            dv3.set_dict(state)
+        # load parameters and optimizer, and opdate iterations done sofar
+        io.load_parameters(ckpt_dir, 0, dv3, optim, file_path=args.checkpoint)
+        if args.checkpoint is not None:
+            iteration = int(os.path.basename(args.checkpoint).split("-")[-1])
+        else:
+            iteration = io.load_latest_checkpoint(ckpt_dir)
 
         # =========================train=========================
-        epoch = train_config["epochs"]
+        max_iter = train_config["max_iteration"]
         snap_interval = train_config["snap_interval"]
         save_interval = train_config["save_interval"]
         eval_interval = train_config["eval_interval"]
 
-        global_step = 1
+        global_step = iteration + 1
+        iterator = iter(tqdm.tqdm(loader))
+        while global_step <= max_iter:
+            try:
+                batch = next(iterator)
+            except StopIteration as e:
+                iterator = iter(tqdm.tqdm(loader))
+                batch = next(iterator)
 
-        for j in range(1, 1 + epoch):
-            epoch_loss = 0.
-            for i, batch in tqdm.tqdm(enumerate(loader, 1)):
-                dv3.train()  # CAUTION: don't forget to switch to train
-                (text_sequences, text_lengths, text_positions, mel_specs,
-                 lin_specs, frames, decoder_positions, done_flags) = batch
-                downsampled_mel_specs = F.strided_slice(
-                    mel_specs,
-                    axes=[1],
-                    starts=[0],
-                    ends=[mel_specs.shape[1]],
-                    strides=[downsample_factor])
-                mel_outputs, linear_outputs, alignments, done = dv3(
-                    text_sequences, text_positions, text_lengths, None,
-                    downsampled_mel_specs, decoder_positions)
+            dv3.train()
+            (text_sequences, text_lengths, text_positions, mel_specs,
+             lin_specs, frames, decoder_positions, done_flags) = batch
+            downsampled_mel_specs = F.strided_slice(
+                mel_specs,
+                axes=[1],
+                starts=[0],
+                ends=[mel_specs.shape[1]],
+                strides=[downsample_factor])
+            mel_outputs, linear_outputs, alignments, done = dv3(
+                text_sequences, text_positions, text_lengths, None,
+                downsampled_mel_specs, decoder_positions)
 
-                losses = criterion(mel_outputs, linear_outputs, done,
-                                   alignments, downsampled_mel_specs,
-                                   lin_specs, done_flags, text_lengths, frames)
-                l = losses["loss"]
-                l.backward()
-                # record learning rate before updating
-                writer.add_scalar("learning_rate",
-                                  optim._learning_rate.step().numpy(),
-                                  global_step)
-                optim.minimize(l, grad_clip=gradient_clipper)
-                optim.clear_gradients()
+            losses = criterion(mel_outputs, linear_outputs, done, alignments,
+                               downsampled_mel_specs, lin_specs, done_flags,
+                               text_lengths, frames)
+            l = losses["loss"]
+            l.backward()
+            # record learning rate before updating
+            writer.add_scalar("learning_rate",
+                              optim._learning_rate.step().numpy(), global_step)
+            optim.minimize(l, grad_clip=gradient_clipper)
+            optim.clear_gradients()
 
-                # ==================all kinds of tedious things=================
-                # record step loss into tensorboard
-                epoch_loss += l.numpy()[0]
-                step_loss = {k: v.numpy()[0] for k, v in losses.items()}
-                for k, v in step_loss.items():
-                    writer.add_scalar(k, v, global_step)
+            # ==================all kinds of tedious things=================
+            # record step loss into tensorboard
+            step_loss = {k: v.numpy()[0] for k, v in losses.items()}
+            tqdm.tqdm.write("global_step: {}\tloss: {}".format(
+                global_step, step_loss["loss"]))
+            for k, v in step_loss.items():
+                writer.add_scalar(k, v, global_step)
 
-                # TODO: clean code
-                # train state saving, the first sentence in the batch
-                if global_step % snap_interval == 0:
-                    save_state(
-                        state_dir,
-                        writer,
+            # train state saving, the first sentence in the batch
+            if global_step % snap_interval == 0:
+                save_state(
+                    state_dir,
+                    writer,
+                    global_step,
+                    mel_input=downsampled_mel_specs,
+                    mel_output=mel_outputs,
+                    lin_input=lin_specs,
+                    lin_output=linear_outputs,
+                    alignments=alignments,
+                    win_length=win_length,
+                    hop_length=hop_length,
+                    min_level_db=min_level_db,
+                    ref_level_db=ref_level_db,
+                    power=power,
+                    n_iter=n_iter,
+                    preemphasis=preemphasis,
+                    sample_rate=sample_rate)
+
+            # evaluation
+            if global_step % eval_interval == 0:
+                sentences = [
+                    "Scientists at the CERN laboratory say they have discovered a new particle.",
+                    "There's a way to measure the acute emotional intelligence that has never gone out of style.",
+                    "President Trump met with other leaders at the Group of 20 conference.",
+                    "Generative adversarial network or variational auto-encoder.",
+                    "Please call Stella.",
+                    "Some have accepted this as a miracle without any physical explanation.",
+                ]
+                for idx, sent in enumerate(sentences):
+                    wav, attn = eval_model(
+                        dv3, sent, replace_pronounciation_prob, min_level_db,
+                        ref_level_db, power, n_iter, win_length, hop_length,
+                        preemphasis)
+                    wav_path = os.path.join(
+                        state_dir, "waveform",
+                        "eval_sample_{:09d}.wav".format(global_step))
+                    sf.write(wav_path, wav, sample_rate)
+                    writer.add_audio(
+                        "eval_sample_{}".format(idx),
+                        wav,
                         global_step,
-                        mel_input=downsampled_mel_specs,
-                        mel_output=mel_outputs,
-                        lin_input=lin_specs,
-                        lin_output=linear_outputs,
-                        alignments=alignments,
-                        win_length=win_length,
-                        hop_length=hop_length,
-                        min_level_db=min_level_db,
-                        ref_level_db=ref_level_db,
-                        power=power,
-                        n_iter=n_iter,
-                        preemphasis=preemphasis,
                         sample_rate=sample_rate)
+                    attn_path = os.path.join(
+                        state_dir, "alignments",
+                        "eval_sample_attn_{:09d}.png".format(global_step))
+                    plot_alignment(attn, attn_path)
+                    writer.add_image(
+                        "eval_sample_attn{}".format(idx),
+                        cm.viridis(attn),
+                        global_step,
+                        dataformats="HWC")
 
-                # evaluation
-                if global_step % eval_interval == 0:
-                    sentences = [
-                        "Scientists at the CERN laboratory say they have discovered a new particle.",
-                        "There's a way to measure the acute emotional intelligence that has never gone out of style.",
-                        "President Trump met with other leaders at the Group of 20 conference.",
-                        "Generative adversarial network or variational auto-encoder.",
-                        "Please call Stella.",
-                        "Some have accepted this as a miracle without any physical explanation.",
-                    ]
-                    for idx, sent in enumerate(sentences):
-                        wav, attn = eval_model(
-                            dv3, sent, replace_pronounciation_prob,
-                            min_level_db, ref_level_db, power, n_iter,
-                            win_length, hop_length, preemphasis)
-                        wav_path = os.path.join(
-                            state_dir, "waveform",
-                            "eval_sample_{:09d}.wav".format(global_step))
-                        sf.write(wav_path, wav, sample_rate)
-                        writer.add_audio(
-                            "eval_sample_{}".format(idx),
-                            wav,
-                            global_step,
-                            sample_rate=sample_rate)
-                        attn_path = os.path.join(
-                            state_dir, "alignments",
-                            "eval_sample_attn_{:09d}.png".format(global_step))
-                        plot_alignment(attn, attn_path)
-                        writer.add_image(
-                            "eval_sample_attn{}".format(idx),
-                            cm.viridis(attn),
-                            global_step,
-                            dataformats="HWC")
+            # save checkpoint
+            if global_step % save_interval == 0:
+                io.save_latest_parameters(ckpt_dir, global_step, dv3, optim)
+                io.save_latest_checkpoint(ckpt_dir, global_step)
 
-                # save checkpoint
-                if global_step % save_interval == 0:
-                    dg.save_dygraph(
-                        dv3.state_dict(),
-                        os.path.join(ckpt_dir,
-                                     "model_step_{}".format(global_step)))
-                    dg.save_dygraph(
-                        optim.state_dict(),
-                        os.path.join(ckpt_dir,
-                                     "model_step_{}".format(global_step)))
-
-                global_step += 1
-            # epoch report
-            writer.add_scalar("epoch_average_loss", epoch_loss / i, j)
-            epoch_loss = 0.
+            global_step += 1
diff --git a/examples/wavenet/README.md b/examples/wavenet/README.md
index 5114182..af34457 100644
--- a/examples/wavenet/README.md
+++ b/examples/wavenet/README.md
@@ -28,22 +28,22 @@ Train the model using train.py. For help on usage, try `python train.py --help`.
 
 ```text
 usage: train.py [-h] [--data DATA] [--config CONFIG] [--output OUTPUT]
-                [--device DEVICE] [--resume RESUME]
+                [--device DEVICE] [--checkpoint CHECKPOINT]
 
 Train a WaveNet model with LJSpeech.
 
 optional arguments:
-  -h, --help       show this help message and exit
-  --data DATA      path of the LJspeech dataset.
-  --config CONFIG  path of the config file.
-  --output OUTPUT  path to save results.
-  --device DEVICE  device to use.
-  --resume RESUME  checkpoint to resume from.
+  -h, --help                show this help message and exit
+  --data DATA               path of the LJspeech dataset.
+  --config CONFIG           path of the config file.
+  --output OUTPUT           path to save results.
+  --device DEVICE           device to use.
+  --checkpoint CHECKPOINT   checkpoint to resume from.
 ```
 
 - `--config` is the configuration file to use. The provided configurations can be used directly. And you can change some values in the configuration file and train the model with a different config.
 - `--data` is the path of the LJSpeech dataset, the extracted folder from the downloaded archive (the folder which contains metadata.txt).
-- `--resume` is the path of the checkpoint. If it is provided, the model would load the checkpoint before training.
+- `--checkpoint` is the path of the checkpoint. If it is provided, the model would load the checkpoint before training.
 - `--output` is the directory to save results, all result are saved in this directory. The structure of the output directory is shown below.
 
 ```text
@@ -51,6 +51,8 @@ optional arguments:
 └── log              # tensorboard log
 ```
 
+If `checkpoints` is not empty and argument `--checkpoint` is not specified, the model will be resumed from the latest checkpoint at the beginning of training.
+
 - `--device` is the device (gpu id) to use for training. `-1` means CPU.
 
 Example script:
diff --git a/examples/wavenet/synthesis.py b/examples/wavenet/synthesis.py
index f3d4c93..5edb1ed 100644
--- a/examples/wavenet/synthesis.py
+++ b/examples/wavenet/synthesis.py
@@ -27,7 +27,7 @@ from parakeet.models.wavenet import UpsampleNet, WaveNet, ConditionalWavenet
 from parakeet.utils.layer_tools import summary
 
 from data import LJSpeechMetaData, Transform, DataCollector
-from utils import make_output_tree, valid_model, eval_model, save_checkpoint
+from utils import make_output_tree, valid_model, eval_model
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
@@ -87,7 +87,8 @@ if __name__ == "__main__":
         batch_size=1,
         sampler=SequentialSampler(ljspeech_valid))
 
-    make_output_tree(args.output)
+    if not os.path.exists(args.output):
+        os.makedirs(args.output)
 
     if args.device == -1:
         place = fluid.CPUPlace()
diff --git a/examples/wavenet/train.py b/examples/wavenet/train.py
index df24b10..3fdfaeb 100644
--- a/examples/wavenet/train.py
+++ b/examples/wavenet/train.py
@@ -16,7 +16,7 @@ from __future__ import division
 import os
 import ruamel.yaml
 import argparse
-from tqdm import tqdm
+import tqdm
 from tensorboardX import SummaryWriter
 from paddle import fluid
 import paddle.fluid.dygraph as dg
@@ -24,13 +24,14 @@ import paddle.fluid.dygraph as dg
 from parakeet.data import SliceDataset, TransformDataset, DataCargo, SequentialSampler, RandomSampler
 from parakeet.models.wavenet import UpsampleNet, WaveNet, ConditionalWavenet
 from parakeet.utils.layer_tools import summary
+from parakeet.utils import io
 
 from data import LJSpeechMetaData, Transform, DataCollector
-from utils import make_output_tree, valid_model, save_checkpoint
+from utils import make_output_tree, valid_model
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="Train a wavenet model with LJSpeech.")
+        description="Train a WaveNet model with LJSpeech.")
     parser.add_argument(
         "--data", type=str, help="path of the LJspeech dataset.")
     parser.add_argument("--config", type=str, help="path of the config file.")
@@ -42,12 +43,16 @@ if __name__ == "__main__":
     parser.add_argument(
         "--device", type=int, default=-1, help="device to use.")
     parser.add_argument(
-        "--resume", type=str, help="checkpoint to resume from.")
+        "--checkpoint", type=str, help="checkpoint to resume from.")
 
     args = parser.parse_args()
     with open(args.config, 'rt') as f:
         config = ruamel.yaml.safe_load(f)
 
+    print("Command Line Args: ")
+    for k, v in vars(args).items():
+        print("{}: {}".format(k, v))
+
     ljspeech_meta = LJSpeechMetaData(args.data)
 
     data_config = config["data"]
@@ -126,14 +131,6 @@ if __name__ == "__main__":
         clipper = fluid.dygraph_grad_clip.GradClipByGlobalNorm(
             gradiant_max_norm)
 
-        if args.resume:
-            model_dict, optim_dict = dg.load_dygraph(args.resume)
-            print("Loading from {}.pdparams".format(args.resume))
-            model.set_dict(model_dict)
-            if optim_dict:
-                optim.set_dict(optim_dict)
-                print("Loading from {}.pdopt".format(args.resume))
-
         train_loader = fluid.io.DataLoader.from_generator(
             capacity=10, return_list=True)
         train_loader.set_batch_generator(train_cargo, place)
@@ -150,33 +147,48 @@ if __name__ == "__main__":
         log_dir = os.path.join(args.output, "log")
         writer = SummaryWriter(log_dir)
 
-        global_step = 1
+        # load parameters and optimizer, and opdate iterations done sofar
+        io.load_parameters(
+            checkpoint_dir, 0, model, optim, file_path=args.checkpoint)
+        if args.checkpoint is not None:
+            iteration = int(os.path.basename(args.checkpoint).split("-")[-1])
+        else:
+            iteration = io.load_latest_checkpoint(checkpoint_dir)
+
+        global_step = iteration + 1
+        iterator = iter(tqdm.tqdm(train_loader))
         while global_step <= max_iterations:
-            epoch_loss = 0.
-            for i, batch in tqdm(enumerate(train_loader)):
-                audio_clips, mel_specs, audio_starts = batch
+            print(global_step)
+            try:
+                batch = next(iterator)
+            except StopIteration as e:
+                iterator = iter(tqdm.tqdm(train_loader))
+                batch = next(iterator)
 
-                model.train()
-                y_var = model(audio_clips, mel_specs, audio_starts)
-                loss_var = model.loss(y_var, audio_clips)
-                loss_var.backward()
-                loss_np = loss_var.numpy()
+            audio_clips, mel_specs, audio_starts = batch
 
-                epoch_loss += loss_np[0]
+            model.train()
+            y_var = model(audio_clips, mel_specs, audio_starts)
+            loss_var = model.loss(y_var, audio_clips)
+            loss_var.backward()
+            loss_np = loss_var.numpy()
 
-                writer.add_scalar("loss", loss_np[0], global_step)
-                writer.add_scalar("learning_rate",
-                                  optim._learning_rate.step().numpy()[0],
-                                  global_step)
-                optim.minimize(loss_var, grad_clip=clipper)
-                optim.clear_gradients()
-                print("loss: {:<8.6f}".format(loss_np[0]))
+            writer.add_scalar("loss", loss_np[0], global_step)
+            writer.add_scalar("learning_rate",
+                              optim._learning_rate.step().numpy()[0],
+                              global_step)
+            optim.minimize(loss_var, grad_clip=clipper)
+            optim.clear_gradients()
+            print("global_step: {}\tloss: {:<8.6f}".format(global_step,
+                                                           loss_np[0]))
 
-                if global_step % snap_interval == 0:
-                    valid_model(model, valid_loader, writer, global_step,
-                                sample_rate)
+            if global_step % snap_interval == 0:
+                valid_model(model, valid_loader, writer, global_step,
+                            sample_rate)
 
-                if global_step % checkpoint_interval == 0:
-                    save_checkpoint(model, optim, checkpoint_dir, global_step)
+            if global_step % checkpoint_interval == 0:
+                io.save_latest_parameters(checkpoint_dir, global_step, model,
+                                          optim)
+                io.save_latest_checkpoint(checkpoint_dir, global_step)
 
-                global_step += 1
+            global_step += 1
diff --git a/examples/wavenet/utils.py b/examples/wavenet/utils.py
index bae186f..cb71acd 100644
--- a/examples/wavenet/utils.py
+++ b/examples/wavenet/utils.py
@@ -59,10 +59,3 @@ def eval_model(model, valid_loader, output_dir, sample_rate):
         wav_np = wav_var.numpy()[0]
         sf.write(path, wav_np, samplerate=sample_rate)
         print("generated {}".format(path))
-
-
-def save_checkpoint(model, optim, checkpoint_dir, global_step):
-    checkpoint_path = os.path.join(checkpoint_dir,
-                                   "step_{:09d}".format(global_step))
-    dg.save_dygraph(model.state_dict(), checkpoint_path)
-    dg.save_dygraph(optim.state_dict(), checkpoint_path)

From 23095bf99297102dbb12f0d4222fad649894c84d Mon Sep 17 00:00:00 2001
From: liuyibing01 <liuyibing01@baidu.com>
Date: Wed, 25 Mar 2020 14:48:54 +0000
Subject: [PATCH 07/11] Release waveflow & wavenet ckpts and audio samples

---
 README.md | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 81 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index b5f61fd..aacf6c3 100644
--- a/README.md
+++ b/README.md
@@ -76,29 +76,61 @@ Entries to the introduction, and the launch of training and synthsis for differe
 
 Parakeet also releases some well-trained parameters for the example models, which can be accessed in the following tables. Each column of these tables lists resources for one model, including the url link to the pre-trained model, the dataset that the model is trained on and the total training steps, and several synthesized audio samples based on the pre-trained model.
 
-- Vocoders
+#### Vocoders
+
+We provide the model checkpoints of WaveFlow with 64 and 128 residual channels, ClariNet and WaveNet.
 
 <div align="center">
 <table>
     <thead>
         <tr>
             <th  style="width: 250px">
-            WaveFlow
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_ckpt_1.0.zip">WaveFlow (res. channels 64)</a>
             </th>
             <th  style="width: 250px">
-            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_ckpt_1.0.zip">ClariNet</a>
+            WaveFlow (res. channels 128)
             </th>
         </tr>
     </thead>
     <tbody>
         <tr>
-            <th>LJSpeech, 2M</th>
-            <th>LJSpeech, 500K</th>
+            <th>LJSpeech, 3020 K</th>
+            <th>LJSpeech </th>
         </tr>
         <tr>
+            <th>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_0.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_1.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_2.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_3.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_4.wav">
+            <img src="images/audio_icon.png" width=250 /></a>
+            </th>
             <th>
             To be added soon
             </th>
+        </tr>
+    </tbody>
+    <thead>
+        <tr>
+            <th  style="width: 250px">
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_ckpt_1.0.zip">ClariNet</a>
+            </th>
+            <th  style="width: 250px">
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_ckpt_1.0.zip">WaveNet</a>
+            </th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <th>LJSpeech, 500 K</th>
+            <th>LJSpeech, 2450 K</th>
+        </tr>
+        <tr>
             <th>
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_samples_1.0/step_500000_sentence_0.wav">
             <img src="images/audio_icon.png" width=250 /></a><br>
@@ -111,15 +143,57 @@ Parakeet also releases some well-trained parameters for the example models, whic
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_samples_1.0/step_500000_sentence_4.wav">
             <img src="images/audio_icon.png" width=250 /></a>  
             </th>
+            <th>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_0.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_1.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_2.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_3.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_4.wav">
+            <img src="images/audio_icon.png" width=250 /></a>  
+            </th>
         </tr>
     </tbody>
 </table>
 </div>
 
 
-&nbsp;&nbsp;&nbsp;&nbsp;**Note:** The input mel spectrogams are from validation dataset, which are not seen during training.
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**Note:** The input mel spectrogams are from validation dataset, which are not seen during training.
 
-- TTS models
+#### TTS models
+
+<div align="center">
+<table>
+    <thead>
+        <tr>
+            <th  style="width: 250px">
+            Deep Voice 3
+            </th>
+            <th  style="width: 250px">
+            Transformer TTS
+            </th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <th>LJSpeech </th>
+            <th>LJSpeech </th>
+        </tr>
+        <tr>
+            <th style="height: 150px">
+            To be added soon
+            </th>
+            <th >
+            To be added soon
+            </th>
+        </tr>
+    </tbody>
+    <thead>
+</table>
+</div>
 
 Click each link to download, then you can get the compressed package which contains the pre-trained model and the `yaml` config describing how to train the model.
 

From c845fbd51d734cf6f47ad73cd233470dc3a01fc3 Mon Sep 17 00:00:00 2001
From: chenfeiyu <chenfeiyu@baidu.com>
Date: Thu, 26 Mar 2020 09:48:47 +0800
Subject: [PATCH 08/11] change interface for io.py

---
 parakeet/utils/io.py | 108 +++++++++++++++++++++++++------------------
 1 file changed, 63 insertions(+), 45 deletions(-)

diff --git a/parakeet/utils/io.py b/parakeet/utils/io.py
index e612400..e9e1240 100644
--- a/parakeet/utils/io.py
+++ b/parakeet/utils/io.py
@@ -20,6 +20,11 @@ import numpy as np
 import paddle.fluid.dygraph as dg
 
 
+def is_main_process():
+    local_rank = dg.parallel.Env().local_rank
+    return local_rank == 0
+
+
 def add_yaml_config_to_args(config):
     """ Add args in yaml config to the args parsed by argparse. The argument in 
         yaml config will be overwritten by the same argument in argparse if they 
@@ -41,7 +46,7 @@ def add_yaml_config_to_args(config):
     return config
 
 
-def load_latest_checkpoint(checkpoint_dir, rank=0):
+def _load_latest_checkpoint(checkpoint_dir):
     """Get the iteration number corresponding to the latest saved checkpoint
 
     Args:
@@ -52,26 +57,20 @@ def load_latest_checkpoint(checkpoint_dir, rank=0):
     Returns:
         int: the latest iteration number.
     """
-    checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
+    checkpoint_record = os.path.join(checkpoint_dir, "checkpoint")
     # Create checkpoint index file if not exist.
-    if (not os.path.isfile(checkpoint_path)) and rank == 0:
-        with open(checkpoint_path, "w") as handle:
-            handle.write("model_checkpoint_path: step-0")
-
-    # Make sure that other process waits until checkpoint file is created
-    # by process 0.
-    while not os.path.isfile(checkpoint_path):
-        time.sleep(1)
+    if (not os.path.isfile(checkpoint_record)):
+        return 0
 
     # Fetch the latest checkpoint index.
-    with open(checkpoint_path, "r") as handle:
+    with open(checkpoint_record, "r") as handle:
         latest_checkpoint = handle.readline().split()[-1]
         iteration = int(latest_checkpoint.split("-")[-1])
 
     return iteration
 
 
-def save_latest_checkpoint(checkpoint_dir, iteration):
+def _save_checkpoint(checkpoint_dir, iteration):
     """Save the iteration number of the latest model to be checkpointed.
 
     Args:
@@ -81,60 +80,76 @@ def save_latest_checkpoint(checkpoint_dir, iteration):
     Returns:
         None
     """
-    checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
+    checkpoint_record = os.path.join(checkpoint_dir, "checkpoint")
     # Update the latest checkpoint index.
-    with open(checkpoint_path, "w") as handle:
+    with open(checkpoint_record, "w") as handle:
         handle.write("model_checkpoint_path: step-{}".format(iteration))
 
 
-def load_parameters(checkpoint_dir,
-                    rank,
-                    model,
+def load_parameters(model,
                     optimizer=None,
+                    checkpoint_dir=None,
                     iteration=None,
-                    file_path=None,
+                    checkpoint_path=None,
                     dtype="float32"):
     """Load a specific model checkpoint from disk.
 
     Args:
-        checkpoint_dir (str): the directory where checkpoint is saved.
-        rank (int): the rank of the process in multi-process setting.
         model (obj): model to load parameters.
         optimizer (obj, optional): optimizer to load states if needed.
             Defaults to None.
+        checkpoint_dir (str, optional): the directory where checkpoint is saved.
         iteration (int, optional): if specified, load the specific checkpoint,
             if not specified, load the latest one. Defaults to None.
-        file_path (str, optional): if specified, load the checkpoint
-            stored in the file_path. Defaults to None.
+        checkpoint_path (str, optional): if specified, load the checkpoint
+            stored in the checkpoint_path. Defaults to None. 
         dtype (str, optional): precision of the model parameters.
             Defaults to float32.
 
     Returns:
-        None
+        iteration (int): number of iterations that the loaded checkpoint has 
+            been trained.
     """
-    if file_path is None:
-        if iteration is None:
-            iteration = load_latest_checkpoint(checkpoint_dir, rank)
-        if iteration == 0:
-            return
-        file_path = "{}/step-{}".format(checkpoint_dir, iteration)
+    if checkpoint_dir is not None and checkpoint_path is not None:
+        raise ValueError(
+            "Load from either from (checkpoint_dir and iteration) \n"
+            "or checkpoint_path. Do not pass both.")
+    if iteration is not None and checkpoint_dir is None:
+        raise ValueError(
+            "When iteration is specified, checkpoint_dir should not be None")
+
+    if checkpoint_dir is not None:
+        if iteration is None:
+            iteration = _load_latest_checkpoint(checkpoint_dir)
+        checkpoint_path = os.path.join(checkpoint_dir,
+                                       "step-{}".format(iteration))
+        if iteration == 0 and not os.path.exists(checkpoint_path):
+            # if step-0 exist, it is also loaded
+            return iteration
+    else:
+        # checkpoint is not None
+        iteration = int(os.path.basename(checkpoint_path).split("-")[-1])
+
+    local_rank = dg.parallel.Env().local_rank
+    model_dict, optimizer_dict = dg.load_dygraph(checkpoint_path)
+
+    # cast to desired data type
+    for k, v in model_dict.items():
+        model_dict[k] = v.astype(dtype)
 
-    model_dict, optimizer_dict = dg.load_dygraph(file_path)
-    if dtype == "float16":
-        for k, v in model_dict.items():
-            if "conv2d_transpose" in k:
-                model_dict[k] = v.astype("float32")
-            else:
-                model_dict[k] = v.astype(dtype)
     model.set_dict(model_dict)
-    print("[checkpoint] Rank {}: loaded model from {}".format(rank, file_path))
+    print("[checkpoint] Rank {}: loaded model from {}.pdparams".format(
+        local_rank, checkpoint_path))
+
     if optimizer and optimizer_dict:
         optimizer.set_dict(optimizer_dict)
-        print("[checkpoint] Rank {}: loaded optimizer state from {}".format(
-            rank, file_path))
+        print("[checkpoint] Rank {}: loaded optimizer state from {}.pdopt".
+              format(local_rank, checkpoint_path))
+
+    return iteration
 
 
-def save_latest_parameters(checkpoint_dir, iteration, model, optimizer=None):
+def save_parameters(checkpoint_dir, iteration, model, optimizer=None):
     """Checkpoint the latest trained model parameters.
 
     Args:
@@ -147,12 +162,15 @@ def save_latest_parameters(checkpoint_dir, iteration, model, optimizer=None):
     Returns:
         None
     """
-    file_path = "{}/step-{}".format(checkpoint_dir, iteration)
+    checkpoint_path = os.path.join(checkpoint_dir, "step-{}".format(iteration))
     model_dict = model.state_dict()
-    dg.save_dygraph(model_dict, file_path)
-    print("[checkpoint] Saved model to {}".format(file_path))
+    dg.save_dygraph(model_dict, checkpoint_path)
+    print("[checkpoint] Saved model to {}.pdparams".format(checkpoint_path))
 
     if optimizer:
         opt_dict = optimizer.state_dict()
-        dg.save_dygraph(opt_dict, file_path)
-        print("[checkpoint] Saved optimzier state to {}".format(file_path))
+        dg.save_dygraph(opt_dict, checkpoint_path)
+        print("[checkpoint] Saved optimzier state to {}.pdopt".format(
+            checkpoint_path))
+
+    _save_checkpoint(checkpoint_dir, iteration)

From fccbf6d797ee9d602a2344ac5df9307a64d16716 Mon Sep 17 00:00:00 2001
From: liuyibing01 <liuyibing01@baidu.com>
Date: Thu, 26 Mar 2020 06:45:22 +0000
Subject: [PATCH 09/11] Update README

---
 README.md                                          | 8 ++++----
 {parakeet/models => examples}/waveflow/data.py     | 0
 {parakeet/models => examples}/waveflow/waveflow.py | 0
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename {parakeet/models => examples}/waveflow/data.py (100%)
 rename {parakeet/models => examples}/waveflow/waveflow.py (100%)

diff --git a/README.md b/README.md
index aacf6c3..a851192 100644
--- a/README.md
+++ b/README.md
@@ -74,7 +74,7 @@ Entries to the introduction, and the launch of training and synthsis for differe
 
 ## Pre-trained models and audio samples
 
-Parakeet also releases some well-trained parameters for the example models, which can be accessed in the following tables. Each column of these tables lists resources for one model, including the url link to the pre-trained model, the dataset that the model is trained on and the total training steps, and several synthesized audio samples based on the pre-trained model.
+Parakeet also releases some well-trained parameters for the example models, which can be accessed in the following tables. Each column of these tables lists resources for one model, including the url link to the pre-trained model, the dataset that the model is trained on, and synthesized audio samples based on the pre-trained model.
 
 #### Vocoders
 
@@ -94,7 +94,7 @@ We provide the model checkpoints of WaveFlow with 64 and 128 residual channels,
     </thead>
     <tbody>
         <tr>
-            <th>LJSpeech, 3020 K</th>
+            <th>LJSpeech </th>
             <th>LJSpeech </th>
         </tr>
         <tr>
@@ -127,8 +127,8 @@ We provide the model checkpoints of WaveFlow with 64 and 128 residual channels,
     </thead>
     <tbody>
         <tr>
-            <th>LJSpeech, 500 K</th>
-            <th>LJSpeech, 2450 K</th>
+            <th>LJSpeech </th>
+            <th>LJSpeech </th>
         </tr>
         <tr>
             <th>
diff --git a/parakeet/models/waveflow/data.py b/examples/waveflow/data.py
similarity index 100%
rename from parakeet/models/waveflow/data.py
rename to examples/waveflow/data.py
diff --git a/parakeet/models/waveflow/waveflow.py b/examples/waveflow/waveflow.py
similarity index 100%
rename from parakeet/models/waveflow/waveflow.py
rename to examples/waveflow/waveflow.py

From 618eb31ab4918af41e03be582f0ca4dec6046ad8 Mon Sep 17 00:00:00 2001
From: liuyibing01 <liuyibing01@baidu.com>
Date: Thu, 26 Mar 2020 09:27:22 +0000
Subject: [PATCH 10/11] Adapt the change in save & load

---
 examples/waveflow/README.md          | 10 +++----
 examples/waveflow/benchmark.py       |  2 +-
 examples/waveflow/synthesis.py       |  4 +--
 examples/waveflow/train.py           | 24 ++++------------
 examples/waveflow/waveflow.py        | 34 +++++++++++------------
 parakeet/models/waveflow/__init__.py |  2 +-
 parakeet/utils/io.py                 | 41 +++++++++++++---------------
 7 files changed, 49 insertions(+), 68 deletions(-)

diff --git a/examples/waveflow/README.md b/examples/waveflow/README.md
index 34e6908..16364f6 100644
--- a/examples/waveflow/README.md
+++ b/examples/waveflow/README.md
@@ -13,8 +13,8 @@ PaddlePaddle dynamic graph implementation of [WaveFlow: A Compact Flow-based Mod
 ├── synthesis.py                                     # script for speech synthesis
 ├── train.py                                         # script for model training
 ├── utils.py                                         # helper functions for e.g., model checkpointing
-├── parakeet/models/waveflow/data.py                 # dataset and dataloader settings for LJSpeech
-├── parakeet/models/waveflow/waveflow.py             # WaveFlow model high level APIs
+├── data.py                                          # dataset and dataloader settings for LJSpeech
+├── waveflow.py                                      # WaveFlow model high level APIs
 └── parakeet/models/waveflow/waveflow_modules.py     # WaveFlow model implementation
 ```
 
@@ -48,12 +48,12 @@ python -u train.py \
     --config=./configs/waveflow_ljspeech.yaml \
     --root=./data/LJSpeech-1.1 \
     --name=${ModelName} --batch_size=4 \
-    --parallel=false --use_gpu=true
+    --use_gpu=true
 ```
 
 #### Save and Load checkpoints
 
-Our model will save model parameters as checkpoints in `./runs/waveflow/${ModelName}/checkpoint/` every 10000 iterations by default.
+Our model will save model parameters as checkpoints in `./runs/waveflow/${ModelName}/checkpoint/` every 10000 iterations by default, where `${ModelName}` is the model name for one single experiment and it could be whatever you like.
 The saved checkpoint will have the format of `step-${iteration_number}.pdparams` for model parameters and `step-${iteration_number}.pdopt` for optimizer parameters.
 
 There are three ways to load a checkpoint and resume training (take an example that you want to load a 500000-iteration checkpoint):
@@ -68,7 +68,7 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
 python -u -m paddle.distributed.launch train.py \
     --config=./configs/waveflow_ljspeech.yaml \
     --root=./data/LJSpeech-1.1 \
-    --name=${ModelName} --parallel=true --use_gpu=true
+    --name=${ModelName} --use_gpu=true
 ```
 
 Use `export CUDA_VISIBLE_DEVICES=0,1,2,3` to set the GPUs that you want to use to be visible. Then the `paddle.distributed.launch` module will use these visible GPUs to do data parallel training in multiprocessing mode.
diff --git a/examples/waveflow/benchmark.py b/examples/waveflow/benchmark.py
index 0581471..222e732 100644
--- a/examples/waveflow/benchmark.py
+++ b/examples/waveflow/benchmark.py
@@ -23,7 +23,7 @@ from paddle import fluid
 
 import utils
 from parakeet.utils import io
-from parakeet.models.waveflow import WaveFlow
+from waveflow import WaveFlow
 
 
 def add_options_to_parser(parser):
diff --git a/examples/waveflow/synthesis.py b/examples/waveflow/synthesis.py
index 5f3dd5a..15c4d3b 100644
--- a/examples/waveflow/synthesis.py
+++ b/examples/waveflow/synthesis.py
@@ -21,9 +21,9 @@ import numpy as np
 import paddle.fluid.dygraph as dg
 from paddle import fluid
 
-import utils
-from parakeet.models.waveflow import WaveFlow
 from parakeet.utils import io
+import utils
+from waveflow import WaveFlow
 
 
 def add_options_to_parser(parser):
diff --git a/examples/waveflow/train.py b/examples/waveflow/train.py
index 548c5da..a033369 100644
--- a/examples/waveflow/train.py
+++ b/examples/waveflow/train.py
@@ -26,7 +26,7 @@ from tensorboardX import SummaryWriter
 
 import utils
 from parakeet.utils import io
-from parakeet.models.waveflow import WaveFlow
+from waveflow import WaveFlow
 
 
 def add_options_to_parser(parser):
@@ -40,11 +40,6 @@ def add_options_to_parser(parser):
     parser.add_argument(
         '--root', type=str, help="root path of the LJSpeech dataset")
 
-    parser.add_argument(
-        '--parallel',
-        type=utils.str2bool,
-        default=True,
-        help="option to use data parallel training")
     parser.add_argument(
         '--use_gpu',
         type=utils.str2bool,
@@ -66,11 +61,11 @@ def add_options_to_parser(parser):
 
 def train(config):
     use_gpu = config.use_gpu
-    parallel = config.parallel if use_gpu else False
 
     # Get the rank of the current training process.
-    rank = dg.parallel.Env().local_rank if parallel else 0
-    nranks = dg.parallel.Env().nranks if parallel else 1
+    rank = dg.parallel.Env().local_rank
+    nranks = dg.parallel.Env().nranks
+    parallel = nranks > 1
 
     if rank == 0:
         # Print the whole config setting.
@@ -100,16 +95,7 @@ def train(config):
 
         # Build model.
         model = WaveFlow(config, checkpoint_dir, parallel, rank, nranks, tb)
-        model.build()
-
-        # Obtain the current iteration.
-        if config.checkpoint is None:
-            if config.iteration is None:
-                iteration = io.load_latest_checkpoint(checkpoint_dir, rank)
-            else:
-                iteration = config.iteration
-        else:
-            iteration = int(config.checkpoint.split('/')[-1].split('-')[-1])
+        iteration = model.build()
 
         while iteration < config.max_iterations:
             # Run one single training step.
diff --git a/examples/waveflow/waveflow.py b/examples/waveflow/waveflow.py
index faf2fb6..700116b 100644
--- a/examples/waveflow/waveflow.py
+++ b/examples/waveflow/waveflow.py
@@ -21,11 +21,11 @@ import paddle.fluid.dygraph as dg
 from paddle import fluid
 from scipy.io.wavfile import write
 
-import utils
 from parakeet.utils import io
 from parakeet.modules import weight_norm
-from .data import LJSpeech
-from .waveflow_modules import WaveFlowLoss, WaveFlowModule
+from parakeet.models.waveflow import WaveFlowLoss, WaveFlowModule
+from data import LJSpeech
+import utils
 
 
 class WaveFlow():
@@ -93,13 +93,12 @@ class WaveFlow():
                 parameter_list=waveflow.parameters())
 
             # Load parameters.
-            io.load_parameters(
-                self.checkpoint_dir,
-                self.rank,
-                waveflow,
-                optimizer,
+            iteration = io.load_parameters(
+                model=waveflow,
+                optimizer=optimizer,
+                checkpoint_dir=self.checkpoint_dir,
                 iteration=config.iteration,
-                file_path=config.checkpoint)
+                checkpoint_path=config.checkpoint)
             print("Rank {}: checkpoint loaded.".format(self.rank))
 
             # Data parallelism.
@@ -113,13 +112,11 @@ class WaveFlow():
 
         else:
             # Load parameters.
-            io.load_parameters(
-                self.checkpoint_dir,
-                self.rank,
-                waveflow,
+            iteration = io.load_parameters(
+                model=waveflow,
+                checkpoint_dir=self.checkpoint_dir,
                 iteration=config.iteration,
-                file_path=config.checkpoint,
-                dtype=self.dtype)
+                checkpoint_path=config.checkpoint)
             print("Rank {}: checkpoint loaded.".format(self.rank))
 
             for layer in waveflow.sublayers():
@@ -128,6 +125,8 @@ class WaveFlow():
 
             self.waveflow = waveflow
 
+        return iteration
+
     def train_step(self, iteration):
         """Train the model for one step.
 
@@ -293,6 +292,5 @@ class WaveFlow():
         Returns:
             None
         """
-        io.save_latest_parameters(self.checkpoint_dir, iteration,
-                                  self.waveflow, self.optimizer)
-        io.save_latest_checkpoint(self.checkpoint_dir, iteration)
+        io.save_parameters(self.checkpoint_dir, iteration, self.waveflow,
+                           self.optimizer)
diff --git a/parakeet/models/waveflow/__init__.py b/parakeet/models/waveflow/__init__.py
index 73a7914..b068b59 100644
--- a/parakeet/models/waveflow/__init__.py
+++ b/parakeet/models/waveflow/__init__.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from parakeet.models.waveflow.waveflow import WaveFlow
+from parakeet.models.waveflow.waveflow_modules import WaveFlowLoss, WaveFlowModule
diff --git a/parakeet/utils/io.py b/parakeet/utils/io.py
index e9e1240..8c0e5aa 100644
--- a/parakeet/utils/io.py
+++ b/parakeet/utils/io.py
@@ -18,6 +18,7 @@ import time
 import ruamel.yaml
 import numpy as np
 import paddle.fluid.dygraph as dg
+from paddle.fluid.framework import convert_np_dtype_to_dtype_ as convert_np_dtype
 
 
 def is_main_process():
@@ -90,9 +91,8 @@ def load_parameters(model,
                     optimizer=None,
                     checkpoint_dir=None,
                     iteration=None,
-                    checkpoint_path=None,
-                    dtype="float32"):
-    """Load a specific model checkpoint from disk.
+                    checkpoint_path=None):
+    """Load a specific model checkpoint from disk. 
 
     Args:
         model (obj): model to load parameters.
@@ -102,40 +102,37 @@ def load_parameters(model,
         iteration (int, optional): if specified, load the specific checkpoint,
             if not specified, load the latest one. Defaults to None.
         checkpoint_path (str, optional): if specified, load the checkpoint
-            stored in the checkpoint_path. Defaults to None. 
-        dtype (str, optional): precision of the model parameters.
-            Defaults to float32.
+            stored in the checkpoint_path and the argument 'checkpoint_dir' will 
+            be ignored. Defaults to None. 
 
     Returns:
         iteration (int): number of iterations that the loaded checkpoint has 
             been trained.
     """
-    if checkpoint_dir is not None and checkpoint_path is not None:
-        raise ValueError(
-            "Load from either from (checkpoint_dir and iteration) \n"
-            "or checkpoint_path. Do not pass both.")
-    if iteration is not None and checkpoint_dir is None:
-        raise ValueError(
-            "When iteration is specified, checkpoint_dir should not be None")
-
-    if checkpoint_dir is not None:
+    if checkpoint_path is not None:
+        iteration = int(os.path.basename(checkpoint_path).split("-")[-1])
+    elif checkpoint_dir is not None:
         if iteration is None:
             iteration = _load_latest_checkpoint(checkpoint_dir)
-        checkpoint_path = os.path.join(checkpoint_dir,
-                                       "step-{}".format(iteration))
-        if iteration == 0 and not os.path.exists(checkpoint_path):
+        if iteration == 0:
             # if step-0 exist, it is also loaded
             return iteration
+        checkpoint_path = os.path.join(checkpoint_dir,
+                                       "step-{}".format(iteration))
     else:
-        # checkpoint is not None
-        iteration = int(os.path.basename(checkpoint_path).split("-")[-1])
+        raise ValueError(
+            "At least one of 'checkpoint_dir' and 'checkpoint_path' should be specified!"
+        )
 
     local_rank = dg.parallel.Env().local_rank
     model_dict, optimizer_dict = dg.load_dygraph(checkpoint_path)
 
-    # cast to desired data type
+    state_dict = model.state_dict()
+    # cast to desired data type, for mixed-precision training/inference.
     for k, v in model_dict.items():
-        model_dict[k] = v.astype(dtype)
+        if k in state_dict and convert_np_dtype(v.dtype) != state_dict[
+                k].dtype:
+            model_dict[k] = v.astype(state_dict[k].numpy().dtype)
 
     model.set_dict(model_dict)
     print("[checkpoint] Rank {}: loaded model from {}.pdparams".format(

From cf5f720b8f612de61c07168e8875c5e2220e5622 Mon Sep 17 00:00:00 2001
From: liuyibing01 <liuyibing01@baidu.com>
Date: Thu, 2 Apr 2020 07:58:34 +0000
Subject: [PATCH 11/11] Release ckpt & samples for waveflow res. channels 128

---
 README.md | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index a851192..e32219b 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ We provide the model checkpoints of WaveFlow with 64 and 128 residual channels,
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_ckpt_1.0.zip">WaveFlow (res. channels 64)</a>
             </th>
             <th  style="width: 250px">
-            WaveFlow (res. channels 128)
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res128_ljspeech_ckpt_1.0.zip">WaveFlow (res. channels 128)</a>
             </th>
         </tr>
     </thead>
@@ -111,7 +111,16 @@ We provide the model checkpoints of WaveFlow with 64 and 128 residual channels,
             <img src="images/audio_icon.png" width=250 /></a>
             </th>
             <th>
-            To be added soon
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_0.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_1.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_2.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_3.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_4.wav">
+            <img src="images/audio_icon.png" width=250 /></a>
             </th>
         </tr>
     </tbody>