diff --git a/examples/waveflow/utils.py b/examples/waveflow/utils.py
index da9b4ba..b899073 100644
--- a/examples/waveflow/utils.py
+++ b/examples/waveflow/utils.py
@@ -109,6 +109,16 @@ def add_yaml_config(config):
 
 
 def load_latest_checkpoint(checkpoint_dir, rank=0):
+    """Get the iteration number corresponding to the latest saved checkpoint
+
+    Args:
+        checkpoint_dir (str): the directory where checkpoint is saved.
+        rank (int, optional): the rank of the process in multi-process setting.
+            Defaults to 0.
+
+    Returns:
+        int: the latest iteration number.
+    """
     checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
     # Create checkpoint index file if not exist.
     if (not os.path.isfile(checkpoint_path)) and rank == 0:
@@ -129,6 +139,15 @@ def load_latest_checkpoint(checkpoint_dir, rank=0):
 
 
 def save_latest_checkpoint(checkpoint_dir, iteration):
+    """Save the iteration number of the latest model to be checkpointed.
+
+    Args:
+        checkpoint_dir (str): the directory where checkpoint is saved.
+        iteration (int): the latest iteration number.
+
+    Returns:
+        None
+    """
     checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
     # Update the latest checkpoint index.
     with open(checkpoint_path, "w") as handle:
@@ -142,6 +161,24 @@ def load_parameters(checkpoint_dir,
                     iteration=None,
                     file_path=None,
                     dtype="float32"):
+    """Load a specific model checkpoint from disk.
+
+    Args:
+        checkpoint_dir (str): the directory where checkpoint is saved.
+        rank (int): the rank of the process in multi-process setting.
+        model (obj): model to load parameters.
+        optimizer (obj, optional): optimizer to load states if needed.
+            Defaults to None.
+        iteration (int, optional): if specified, load the specific checkpoint,
+            if not specified, load the latest one. Defaults to None.
+        file_path (str, optional): if specified, load the checkpoint
+            stored in the file_path. Defaults to None.
+        dtype (str, optional): precision of the model parameters.
+            Defaults to float32.
+
+    Returns:
+        None
+    """
     if file_path is None:
         if iteration is None:
             iteration = load_latest_checkpoint(checkpoint_dir, rank)
@@ -165,6 +202,18 @@ def load_parameters(checkpoint_dir,
 
 
 def save_latest_parameters(checkpoint_dir, iteration, model, optimizer=None):
+    """Checkpoint the latest trained model parameters.
+
+    Args:
+        checkpoint_dir (str): the directory where checkpoint is saved.
+        iteration (int): the latest iteration number.
+        model (obj): model to be checkpointed.
+        optimizer (obj, optional): optimizer to be checkpointed.
+            Defaults to None.
+
+    Returns:
+        None
+    """
     file_path = "{}/step-{}".format(checkpoint_dir, iteration)
     model_dict = model.state_dict()
     dg.save_dygraph(model_dict, file_path)
diff --git a/parakeet/models/waveflow/data.py b/parakeet/models/waveflow/data.py
index 83438f7..33e2ee5 100644
--- a/parakeet/models/waveflow/data.py
+++ b/parakeet/models/waveflow/data.py
@@ -80,6 +80,7 @@ class Subset(DatasetMixin):
             # whole audio for valid set
             pass
         else:
+            # Randomly crop segment_length from audios in the training set.
             # audio shape: [len]
             if audio.shape[0] >= segment_length:
                 max_audio_start = audio.shape[0] - segment_length
diff --git a/parakeet/models/waveflow/waveflow.py b/parakeet/models/waveflow/waveflow.py
index 4ef1411..101bb66 100644
--- a/parakeet/models/waveflow/waveflow.py
+++ b/parakeet/models/waveflow/waveflow.py
@@ -28,6 +28,25 @@ from .waveflow_modules import WaveFlowLoss, WaveFlowModule
 
 
 class WaveFlow():
+    """Wrapper class of WaveFlow model that supports multiple APIs.
+
+    This module provides APIs for model building, training, validation,
+    inference, benchmarking, and saving.
+
+    Args:
+        config (obj): config info.
+        checkpoint_dir (str): path for checkpointing.
+        parallel (bool, optional): whether use multiple GPUs for training.
+            Defaults to False.
+        rank (int, optional): the rank of the process in a multi-process
+            scenario. Defaults to 0.
+        nranks (int, optional): the total number of processes. Defaults to 1.
+        tb_logger (obj, optional): logger to visualize metrics.
+            Defaults to None.
+
+    Returns:
+        WaveFlow
+    """
     def __init__(self,
                  config,
                  checkpoint_dir,
@@ -44,6 +63,15 @@ class WaveFlow():
         self.dtype = "float16" if config.use_fp16 else "float32"
 
     def build(self, training=True):
+        """Initialize the model.
+
+        Args:
+            training (bool, optional): Whether the model is built for training or inference.
+                Defaults to True.
+
+        Returns:
+            None
+        """
         config = self.config
         dataset = LJSpeech(config, self.nranks, self.rank)
         self.trainloader = dataset.trainloader
@@ -99,6 +127,14 @@ class WaveFlow():
             self.waveflow = waveflow
 
     def train_step(self, iteration):
+        """Train the model for one step.
+
+        Args:
+            iteration (int): current iteration number.
+
+        Returns:
+            None
+        """
         self.waveflow.train()
 
         start_time = time.time()
@@ -135,6 +171,14 @@ class WaveFlow():
 
     @dg.no_grad
     def valid_step(self, iteration):
+        """Run the model on the validation dataset.
+
+        Args:
+            iteration (int): current iteration number.
+
+        Returns:
+            None
+        """
         self.waveflow.eval()
         tb = self.tb_logger
 
@@ -167,6 +211,14 @@ class WaveFlow():
 
     @dg.no_grad
     def infer(self, iteration):
+        """Run the model to synthesize audios.
+
+        Args:
+            iteration (int): iteration number of the loaded checkpoint.
+
+        Returns:
+            None
+        """
         self.waveflow.eval()
 
         config = self.config
@@ -203,6 +255,14 @@ class WaveFlow():
 
     @dg.no_grad
     def benchmark(self):
+        """Run the model to benchmark synthesis speed.
+
+        Args:
+            None
+
+        Returns:
+            None
+        """
         self.waveflow.eval()
 
         mels_list = [mels for _, mels in self.validloader()]
@@ -223,6 +283,14 @@ class WaveFlow():
             print("{} X real-time".format(audio_time / syn_time))
 
     def save(self, iteration):
+        """Save model checkpoint.
+
+        Args:
+            iteration (int): iteration number of the model to be saved.
+
+        Returns:
+            None
+        """
         utils.save_latest_parameters(self.checkpoint_dir, iteration,
                                      self.waveflow, self.optimizer)
         utils.save_latest_checkpoint(self.checkpoint_dir, iteration)
diff --git a/parakeet/models/waveflow/waveflow_modules.py b/parakeet/models/waveflow/waveflow_modules.py
index 46dfba7..f480cd9 100644
--- a/parakeet/models/waveflow/waveflow_modules.py
+++ b/parakeet/models/waveflow/waveflow_modules.py
@@ -293,6 +293,14 @@ class Flow(dg.Layer):
 
 
 class WaveFlowModule(dg.Layer):
+    """WaveFlow model implementation.
+
+    Args:
+        config (obj): model configuration parameters.
+
+    Returns:
+        WaveFlowModule
+    """
     def __init__(self, config):
         super(WaveFlowModule, self).__init__()
         self.n_flows = config.n_flows
@@ -321,6 +329,22 @@ class WaveFlowModule(dg.Layer):
             self.perms.append(perm)
 
     def forward(self, audio, mel):
+        """Training forward pass.
+
+        Use a conditioner to upsample mel spectrograms into hidden states.
+        These hidden states along with the audio are passed to a stack of Flow
+        modules to obtain the final latent variable z and a list of log scaling
+        variables, which are then passed to the WaveFlowLoss module to calculate
+        the negative log likelihood.
+
+        Args:
+            audio (obj): audio samples.
+            mel (obj): mel spectrograms.
+
+        Returns:
+            z (obj): latent variable.
+            log_s_list(list): list of log scaling variables.
+        """
         mel = self.conditioner(mel)
         assert mel.shape[2] >= audio.shape[1]
         # Prune out the tail of audio/mel so that time/n_group == 0.
@@ -361,6 +385,20 @@ class WaveFlowModule(dg.Layer):
         return z, log_s_list
 
     def synthesize(self, mel, sigma=1.0):
+        """Use model to synthesize waveform.
+
+        Use a conditioner to upsample mel spectrograms into hidden states.
+        These hidden states along with initial random gaussian latent variable
+        are passed to a stack of Flow modules to obtain the audio output.
+
+        Args:
+            mel (obj): mel spectrograms.
+            sigma (float, optional): standard deviation of the guassian latent
+                variable. Defaults to 1.0.
+
+        Returns:
+            audio (obj): synthesized audio.
+        """
         if self.dtype == "float16":
             mel = fluid.layers.cast(mel, self.dtype)
         mel = self.conditioner.infer(mel)