add license

2020-02-26 21:03:51 +08:00 · 2020-02-26 21:03:51 +08:00 · 9d79699432
parent f84d6bec91
commit 9d79699432
92 changed files with 3322 additions and 1455 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -25,3 +25,11 @@
        files: \.md$
    -   id: remove-tabs
        files: \.md$
+-   repo: local
+    hooks:
+    -   id: copyright_checker
+        name: copyright_checker
+        entry: python ./tools/copyright.hook
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
+        exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
--- a/examples/deepvoice3/README.md
+++ b/examples/deepvoice3/README.md
@ -112,4 +112,3 @@ example script:
 ```bash
 python synthesis.py --config=./ljspeech.yaml --device=0 experiment/checkpoints/model_step_005000000 sentences.txt generated
 ```
-
--- a/examples/deepvoice3/data.py
+++ b/examples/deepvoice3/data.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import csv
 from pathlib import Path
@ -79,10 +93,11 @@ class Transform(object):
        y = signal.lfilter([1., -self.preemphasis], [1.], wav)

        # STFT
-        D = librosa.stft(y=y,
-                         n_fft=self.n_fft,
-                         win_length=self.win_length,
-                         hop_length=self.hop_length)
+        D = librosa.stft(
+            y=y,
+            n_fft=self.n_fft,
+            win_length=self.win_length,
+            hop_length=self.hop_length)
        S = np.abs(D)

        # to db and normalize to 0-1
@ -96,11 +111,8 @@ class Transform(object):

        # mel scale and to db and normalize to 0-1,
        # CAUTION: pass linear scale S, not dbscaled S
-        S_mel = librosa.feature.melspectrogram(S=S,
-                                               n_mels=self.n_mels,
-                                               fmin=self.fmin,
-                                               fmax=self.fmax,
-                                               power=1.)
+        S_mel = librosa.feature.melspectrogram(
+            S=S, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, power=1.)
        S_mel = 20 * np.log10(np.maximum(amplitude_min,
                                         S_mel)) - self.ref_level_db
        S_mel_norm = (S_mel - self.min_level_db) / (-self.min_level_db)
@ -148,20 +160,18 @@ class DataCollector(object):
            (mix_grapheme_phonemes, text_length, speaker_id, S_norm,
             S_mel_norm, num_frames) = example
            text_sequences.append(
-                np.pad(mix_grapheme_phonemes,
-                       (0, max_text_length - text_length)))
+                np.pad(mix_grapheme_phonemes, (0, max_text_length - text_length
+                                               )))
            lin_specs.append(
-                np.pad(S_norm,
-                       ((0, 0), (self._pad_begin,
-                                 max_frames - self._pad_begin - num_frames))))
+                np.pad(S_norm, ((0, 0), (self._pad_begin, max_frames -
+                                         self._pad_begin - num_frames))))
            mel_specs.append(
-                np.pad(S_mel_norm,
-                       ((0, 0), (self._pad_begin,
-                                 max_frames - self._pad_begin - num_frames))))
+                np.pad(S_mel_norm, ((0, 0), (self._pad_begin, max_frames -
+                                             self._pad_begin - num_frames))))
            done_flags.append(
                np.pad(np.zeros((int(np.ceil(num_frames // self._factor)), )),
-                       (0, max_decoder_length -
-                        int(np.ceil(num_frames // self._factor))),
+                       (0, max_decoder_length - int(
+                           np.ceil(num_frames // self._factor))),
                       constant_values=1))
        text_sequences = np.array(text_sequences).astype(np.int64)
        lin_specs = np.transpose(np.array(lin_specs),
--- a/examples/deepvoice3/synthesis.py
+++ b/examples/deepvoice3/synthesis.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import argparse
 import ruamel.yaml
@ -22,11 +36,8 @@ if __name__ == "__main__":
    parser.add_argument("checkpoint", type=str, help="checkpoint to load.")
    parser.add_argument("text", type=str, help="text file to synthesize")
    parser.add_argument("output_path", type=str, help="path to save results")
-    parser.add_argument("-g",
-                        "--device",
-                        type=int,
-                        default=-1,
-                        help="device to use")
+    parser.add_argument(
+        "-g", "--device", type=int, default=-1, help="device to use")

    args = parser.parse_args()
    with open(args.config, 'rt') as f:
@ -76,15 +87,14 @@ if __name__ == "__main__":
        window_ahead = model_config["window_ahead"]
        key_projection = model_config["key_projection"]
        value_projection = model_config["value_projection"]
-        dv3 = make_model(n_speakers, speaker_dim, speaker_embed_std, embed_dim,
-                         padding_idx, embedding_std, max_positions, n_vocab,
-                         freeze_embedding, filter_size, encoder_channels,
-                         n_mels, decoder_channels, r,
-                         trainable_positional_encodings, use_memory_mask,
-                         query_position_rate, key_position_rate,
-                         window_backward, window_ahead, key_projection,
-                         value_projection, downsample_factor, linear_dim,
-                         use_decoder_states, converter_channels, dropout)
+        dv3 = make_model(
+            n_speakers, speaker_dim, speaker_embed_std, embed_dim, padding_idx,
+            embedding_std, max_positions, n_vocab, freeze_embedding,
+            filter_size, encoder_channels, n_mels, decoder_channels, r,
+            trainable_positional_encodings, use_memory_mask,
+            query_position_rate, key_position_rate, window_backward,
+            window_ahead, key_projection, value_projection, downsample_factor,
+            linear_dim, use_decoder_states, converter_channels, dropout)

        summary(dv3)
        state, _ = dg.load_dygraph(args.checkpoint)
--- a/examples/deepvoice3/train.py
+++ b/examples/deepvoice3/train.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import argparse
 import ruamel.yaml
--- a/examples/deepvoice3/utils.py
+++ b/examples/deepvoice3/utils.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import numpy as np
 from matplotlib import cm
@ -28,8 +42,9 @@ def make_model(n_speakers, speaker_dim, speaker_embed_std, embed_dim,
               converter_channels, dropout):
    """just a simple function to create a deepvoice 3 model"""
    if n_speakers > 1:
-        spe = dg.Embedding((n_speakers, speaker_dim),
-                           param_attr=I.Normal(scale=speaker_embed_std))
+        spe = dg.Embedding(
+            (n_speakers, speaker_dim),
+            param_attr=I.Normal(scale=speaker_embed_std))
    else:
        spe = None

@ -45,17 +60,17 @@ def make_model(n_speakers, speaker_dim, speaker_embed_std, embed_dim,
        ConvSpec(h, k, 9),
        ConvSpec(h, k, 27),
        ConvSpec(h, k, 1),
-        ConvSpec(h, k, 3),
-    )
-    enc = Encoder(n_vocab,
-                  embed_dim,
-                  n_speakers,
-                  speaker_dim,
-                  padding_idx=None,
-                  embedding_weight_std=embedding_std,
-                  convolutions=encoder_convolutions,
-                  max_positions=max_positions,
-                  dropout=dropout)
+        ConvSpec(h, k, 3), )
+    enc = Encoder(
+        n_vocab,
+        embed_dim,
+        n_speakers,
+        speaker_dim,
+        padding_idx=None,
+        embedding_weight_std=embedding_std,
+        convolutions=encoder_convolutions,
+        max_positions=max_positions,
+        dropout=dropout)
    if freeze_embedding:
        freeze(enc.embed)

@ -66,28 +81,28 @@ def make_model(n_speakers, speaker_dim, speaker_embed_std, embed_dim,
        ConvSpec(h, k, 3),
        ConvSpec(h, k, 9),
        ConvSpec(h, k, 27),
-        ConvSpec(h, k, 1),
-    )
+        ConvSpec(h, k, 1), )
    attention = [True, False, False, False, True]
    force_monotonic_attention = [True, False, False, False, True]
-    dec = Decoder(n_speakers,
-                  speaker_dim,
-                  embed_dim,
-                  mel_dim,
-                  r=r,
-                  max_positions=max_positions,
-                  padding_idx=padding_idx,
-                  preattention=prenet_convolutions,
-                  convolutions=attentive_convolutions,
-                  attention=attention,
-                  dropout=dropout,
-                  use_memory_mask=use_memory_mask,
-                  force_monotonic_attention=force_monotonic_attention,
-                  query_position_rate=query_position_rate,
-                  key_position_rate=key_position_rate,
-                  window_range=WindowRange(window_behind, window_ahead),
-                  key_projection=key_projection,
-                  value_projection=value_projection)
+    dec = Decoder(
+        n_speakers,
+        speaker_dim,
+        embed_dim,
+        mel_dim,
+        r=r,
+        max_positions=max_positions,
+        padding_idx=padding_idx,
+        preattention=prenet_convolutions,
+        convolutions=attentive_convolutions,
+        attention=attention,
+        dropout=dropout,
+        use_memory_mask=use_memory_mask,
+        force_monotonic_attention=force_monotonic_attention,
+        query_position_rate=query_position_rate,
+        key_position_rate=key_position_rate,
+        window_range=WindowRange(window_behind, window_ahead),
+        key_projection=key_projection,
+        value_projection=value_projection)
    if not trainable_positional_encodings:
        freeze(dec.embed_keys_positions)
        freeze(dec.embed_query_positions)
@ -97,15 +112,15 @@ def make_model(n_speakers, speaker_dim, speaker_embed_std, embed_dim,
        ConvSpec(h, k, 1),
        ConvSpec(h, k, 3),
        ConvSpec(2 * h, k, 1),
-        ConvSpec(2 * h, k, 3),
-    )
-    cvt = Converter(n_speakers,
-                    speaker_dim,
-                    dec.state_dim if use_decoder_states else mel_dim,
-                    linear_dim,
-                    time_upsampling=downsample_factor,
-                    convolutions=postnet_convolutions,
-                    dropout=dropout)
+        ConvSpec(2 * h, k, 3), )
+    cvt = Converter(
+        n_speakers,
+        speaker_dim,
+        dec.state_dim if use_decoder_states else mel_dim,
+        linear_dim,
+        time_upsampling=downsample_factor,
+        convolutions=postnet_convolutions,
+        dropout=dropout)
    dv3 = DeepVoice3(enc, dec, cvt, spe, use_decoder_states)
    return dv3

@ -115,8 +130,10 @@ def eval_model(model, text, replace_pronounciation_prob, min_level_db,
               ref_level_db, power, n_iter, win_length, hop_length,
               preemphasis):
    """generate waveform from text using a deepvoice 3 model"""
-    text = np.array(en.text_to_sequence(text, p=replace_pronounciation_prob),
-                    dtype=np.int64)
+    text = np.array(
+        en.text_to_sequence(
+            text, p=replace_pronounciation_prob),
+        dtype=np.int64)
    length = len(text)
    print("text sequence's length: {}".format(length))
    text_positions = np.arange(1, 1 + length)
@ -145,10 +162,11 @@ def spec_to_waveform(spec, min_level_db, ref_level_db, power, n_iter,
    """
    denoramlized = np.clip(spec, 0, 1) * (-min_level_db) + min_level_db
    lin_scaled = np.exp((denoramlized + ref_level_db) / 20 * np.log(10))
-    wav = librosa.griffinlim(lin_scaled**power,
-                             n_iter=n_iter,
-                             hop_length=hop_length,
-                             win_length=win_length)
+    wav = librosa.griffinlim(
+        lin_scaled**power,
+        n_iter=n_iter,
+        hop_length=hop_length,
+        win_length=win_length)
    if preemphasis > 0:
        wav = signal.lfilter([1.], [1., -preemphasis], wav)
    return wav
@ -225,28 +243,30 @@ def save_state(save_dir,
        plt.colorbar()
        plt.title("mel_input")
        plt.savefig(
-            os.path.join(path,
-                         "target_mel_spec_step{:09d}.png".format(global_step)))
+            os.path.join(path, "target_mel_spec_step{:09d}.png".format(
+                global_step)))
        plt.close()

-        writer.add_image("target/mel_spec",
-                         cm.viridis(mel_input),
-                         global_step,
-                         dataformats="HWC")
+        writer.add_image(
+            "target/mel_spec",
+            cm.viridis(mel_input),
+            global_step,
+            dataformats="HWC")

        plt.figure(figsize=(10, 3))
        display.specshow(mel_output)
        plt.colorbar()
        plt.title("mel_output")
        plt.savefig(
-            os.path.join(
-                path, "predicted_mel_spec_step{:09d}.png".format(global_step)))
+            os.path.join(path, "predicted_mel_spec_step{:09d}.png".format(
+                global_step)))
        plt.close()

-        writer.add_image("predicted/mel_spec",
-                         cm.viridis(mel_output),
-                         global_step,
-                         dataformats="HWC")
+        writer.add_image(
+            "predicted/mel_spec",
+            cm.viridis(mel_output),
+            global_step,
+            dataformats="HWC")

    if lin_input is not None and lin_output is not None:
        lin_input = lin_input[0].numpy().T
@ -258,28 +278,30 @@ def save_state(save_dir,
        plt.colorbar()
        plt.title("mel_input")
        plt.savefig(
-            os.path.join(path,
-                         "target_lin_spec_step{:09d}.png".format(global_step)))
+            os.path.join(path, "target_lin_spec_step{:09d}.png".format(
+                global_step)))
        plt.close()

-        writer.add_image("target/lin_spec",
-                         cm.viridis(lin_input),
-                         global_step,
-                         dataformats="HWC")
+        writer.add_image(
+            "target/lin_spec",
+            cm.viridis(lin_input),
+            global_step,
+            dataformats="HWC")

        plt.figure(figsize=(10, 3))
        display.specshow(lin_output)
        plt.colorbar()
        plt.title("mel_input")
        plt.savefig(
-            os.path.join(
-                path, "predicted_lin_spec_step{:09d}.png".format(global_step)))
+            os.path.join(path, "predicted_lin_spec_step{:09d}.png".format(
+                global_step)))
        plt.close()

-        writer.add_image("predicted/lin_spec",
-                         cm.viridis(lin_output),
-                         global_step,
-                         dataformats="HWC")
+        writer.add_image(
+            "predicted/lin_spec",
+            cm.viridis(lin_output),
+            global_step,
+            dataformats="HWC")

    if alignments is not None and len(alignments.shape) == 4:
        path = os.path.join(save_dir, "alignments")
@ -290,10 +312,11 @@ def save_state(save_dir,
                "train_attn_layer_{}_step_{}.png".format(idx, global_step))
            plot_alignment(attn_layer, save_path)

-            writer.add_image("train_attn/layer_{}".format(idx),
-                             cm.viridis(attn_layer),
-                             global_step,
-                             dataformats="HWC")
+            writer.add_image(
+                "train_attn/layer_{}".format(idx),
+                cm.viridis(attn_layer),
+                global_step,
+                dataformats="HWC")

    if lin_output is not None:
        wav = spec_to_waveform(lin_output, min_level_db, ref_level_db, power,
@ -302,7 +325,5 @@ def save_state(save_dir,
        save_path = os.path.join(
            path, "train_sample_step_{:09d}.wav".format(global_step))
        sf.write(save_path, wav, sample_rate)
-        writer.add_audio("train_sample",
-                         wav,
-                         global_step,
-                         sample_rate=sample_rate)
+        writer.add_audio(
+            "train_sample", wav, global_step, sample_rate=sample_rate)
--- a/examples/fastspeech/parse.py
+++ b/examples/fastspeech/parse.py
@ -1,36 +1,90 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse

+
 def add_config_options_to_parser(parser):
-    parser.add_argument('--config_path', type=str, default='config/fastspeech.yaml',
+    parser.add_argument(
+        '--config_path',
+        type=str,
+        default='config/fastspeech.yaml',
        help="the yaml config file path.")
-    parser.add_argument('--batch_size', type=int, default=32,
-        help="batch size for training.")
-    parser.add_argument('--epochs', type=int, default=10000,
+    parser.add_argument(
+        '--batch_size', type=int, default=32, help="batch size for training.")
+    parser.add_argument(
+        '--epochs',
+        type=int,
+        default=10000,
        help="the number of epoch for training.")
-    parser.add_argument('--lr', type=float, default=0.001,
+    parser.add_argument(
+        '--lr',
+        type=float,
+        default=0.001,
        help="the learning rate for training.")
-    parser.add_argument('--save_step', type=int, default=500,
+    parser.add_argument(
+        '--save_step',
+        type=int,
+        default=500,
        help="checkpointing interval during training.")
-    parser.add_argument('--fastspeech_step', type=int, default=70000,
+    parser.add_argument(
+        '--fastspeech_step',
+        type=int,
+        default=70000,
        help="Global step to restore checkpoint of fastspeech.")
-    parser.add_argument('--use_gpu', type=int, default=1,
+    parser.add_argument(
+        '--use_gpu',
+        type=int,
+        default=1,
        help="use gpu or not during training.")
-    parser.add_argument('--use_data_parallel', type=int, default=0,
+    parser.add_argument(
+        '--use_data_parallel',
+        type=int,
+        default=0,
        help="use data parallel or not during training.")

-    parser.add_argument('--data_path', type=str, default='./dataset/LJSpeech-1.1',
+    parser.add_argument(
+        '--data_path',
+        type=str,
+        default='./dataset/LJSpeech-1.1',
        help="the path of dataset.")
-    parser.add_argument('--checkpoint_path', type=str, default=None,
+    parser.add_argument(
+        '--checkpoint_path',
+        type=str,
+        default=None,
        help="the path to load checkpoint or pretrain model.")
-    parser.add_argument('--save_path', type=str, default='./checkpoint',
+    parser.add_argument(
+        '--save_path',
+        type=str,
+        default='./checkpoint',
        help="the path to save checkpoint.")
-    parser.add_argument('--log_dir', type=str, default='./log',
+    parser.add_argument(
+        '--log_dir',
+        type=str,
+        default='./log',
        help="the directory to save tensorboard log.")
-    parser.add_argument('--sample_path', type=str, default='./sample',
+    parser.add_argument(
+        '--sample_path',
+        type=str,
+        default='./sample',
        help="the directory to save audio sample in synthesis.")
-    parser.add_argument('--transtts_path', type=str, default='./log',
+    parser.add_argument(
+        '--transtts_path',
+        type=str,
+        default='./log',
        help="the directory to load pretrain transformerTTS model.")
-    parser.add_argument('--transformer_step', type=int, default=160000,
+    parser.add_argument(
+        '--transformer_step',
+        type=int,
+        default=160000,
        help="the step to load transformerTTS model.")
-    
-
--- a/examples/fastspeech/synthesis.py
+++ b/examples/fastspeech/synthesis.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 from tensorboardX import SummaryWriter
 from collections import OrderedDict
@ -12,6 +25,7 @@ from parakeet.g2p.en import text_to_sequence
 from parakeet import audio
 from parakeet.models.fastspeech.fastspeech import FastSpeech

+
 def load_checkpoint(step, model_path):
    model_dict, _ = fluid.dygraph.load_dygraph(os.path.join(model_path, step))
    new_state_dict = OrderedDict()
@ -22,13 +36,14 @@ def load_checkpoint(step, model_path):
            new_state_dict[param] = model_dict[param]
    return new_state_dict

+
 def synthesis(text_input, args):
    place = (fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace())

    # tensorboard
    if not os.path.exists(args.log_dir):
-            os.mkdir(args.log_dir)
-    path = os.path.join(args.log_dir,'synthesis')
+        os.mkdir(args.log_dir)
+    path = os.path.join(args.log_dir, 'synthesis')

    with open(args.config_path) as f:
        cfg = yaml.load(f, Loader=yaml.Loader)
@ -37,15 +52,19 @@ def synthesis(text_input, args):

    with dg.guard(place):
        model = FastSpeech(cfg)
-        model.set_dict(load_checkpoint(str(args.fastspeech_step), os.path.join(args.checkpoint_path, "fastspeech")))
+        model.set_dict(
+            load_checkpoint(
+                str(args.fastspeech_step),
+                os.path.join(args.checkpoint_path, "fastspeech")))
        model.eval()

        text = np.asarray(text_to_sequence(text_input))
-        text = fluid.layers.unsqueeze(dg.to_variable(text),[0])
-        pos_text = np.arange(1, text.shape[1]+1)
-        pos_text = fluid.layers.unsqueeze(dg.to_variable(pos_text),[0])
+        text = fluid.layers.unsqueeze(dg.to_variable(text), [0])
+        pos_text = np.arange(1, text.shape[1] + 1)
+        pos_text = fluid.layers.unsqueeze(dg.to_variable(pos_text), [0])

-        mel_output, mel_output_postnet = model(text, pos_text, alpha=args.alpha)
+        mel_output, mel_output_postnet = model(
+            text, pos_text, alpha=args.alpha)

        _ljspeech_processor = audio.AudioProcessor(
            sample_rate=cfg['audio']['sr'],
@ -53,8 +72,8 @@ def synthesis(text_input, args):
            min_level_db=cfg['audio']['min_level_db'],
            ref_level_db=cfg['audio']['ref_level_db'],
            n_fft=cfg['audio']['n_fft'],
-            win_length= cfg['audio']['win_length'], 
-            hop_length= cfg['audio']['hop_length'],
+            win_length=cfg['audio']['win_length'],
+            hop_length=cfg['audio']['hop_length'],
            power=cfg['audio']['power'],
            preemphasis=cfg['audio']['preemphasis'],
            signal_norm=True,
@ -67,12 +86,15 @@ def synthesis(text_input, args):
            do_trim_silence=False,
            sound_norm=False)

-        mel_output_postnet = fluid.layers.transpose(fluid.layers.squeeze(mel_output_postnet,[0]), [1,0])
-        wav = _ljspeech_processor.inv_melspectrogram(mel_output_postnet.numpy())
+        mel_output_postnet = fluid.layers.transpose(
+            fluid.layers.squeeze(mel_output_postnet, [0]), [1, 0])
+        wav = _ljspeech_processor.inv_melspectrogram(mel_output_postnet.numpy(
+        ))
        writer.add_audio(text_input, wav, 0, cfg['audio']['sr'])
        print("Synthesis completed !!!")
    writer.close()

+
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Train Fastspeech model")
    add_config_options_to_parser(parser)
--- a/examples/fastspeech/train.py
+++ b/examples/fastspeech/train.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import numpy as np
 import argparse
 import os
@ -20,8 +33,10 @@ import sys
 sys.path.append("../transformer_tts")
 from data import LJSpeechLoader

+
 def load_checkpoint(step, model_path):
-    model_dict, opti_dict = fluid.dygraph.load_dygraph(os.path.join(model_path, step))
+    model_dict, opti_dict = fluid.dygraph.load_dygraph(
+        os.path.join(model_path, step))
    new_state_dict = OrderedDict()
    for param in model_dict:
        if param.startswith('_layers.'):
@ -30,6 +45,7 @@ def load_checkpoint(step, model_path):
            new_state_dict[param] = model_dict[param]
    return new_state_dict, opti_dict

+
 def main(args):
    local_rank = dg.parallel.Env().local_rank if args.use_data_parallel else 0
    nranks = dg.parallel.Env().nranks if args.use_data_parallel else 1
@ -43,26 +59,33 @@ def main(args):
             if args.use_gpu else fluid.CPUPlace())

    if not os.path.exists(args.log_dir):
-            os.mkdir(args.log_dir)
-    path = os.path.join(args.log_dir,'fastspeech')
+        os.mkdir(args.log_dir)
+    path = os.path.join(args.log_dir, 'fastspeech')

    writer = SummaryWriter(path) if local_rank == 0 else None

    with dg.guard(place):
        with fluid.unique_name.guard():
            transformerTTS = TransformerTTS(cfg)
-            model_dict, _ = load_checkpoint(str(args.transformer_step), os.path.join(args.transtts_path, "transformer"))
+            model_dict, _ = load_checkpoint(
+                str(args.transformer_step),
+                os.path.join(args.transtts_path, "transformer"))
            transformerTTS.set_dict(model_dict)
            transformerTTS.eval()

        model = FastSpeech(cfg)
        model.train()
-        optimizer = fluid.optimizer.AdamOptimizer(learning_rate=dg.NoamDecay(1/(cfg['warm_up_step'] *( args.lr ** 2)), cfg['warm_up_step']),
-                                                  parameter_list=model.parameters())
-        reader = LJSpeechLoader(cfg, args, nranks, local_rank, shuffle=True).reader()
+        optimizer = fluid.optimizer.AdamOptimizer(
+            learning_rate=dg.NoamDecay(1 / (
+                cfg['warm_up_step'] * (args.lr**2)), cfg['warm_up_step']),
+            parameter_list=model.parameters())
+        reader = LJSpeechLoader(
+            cfg, args, nranks, local_rank, shuffle=True).reader()

        if args.checkpoint_path is not None:
-            model_dict, opti_dict = load_checkpoint(str(args.fastspeech_step), os.path.join(args.checkpoint_path, "fastspeech"))
+            model_dict, opti_dict = load_checkpoint(
+                str(args.fastspeech_step),
+                os.path.join(args.checkpoint_path, "fastspeech"))
            model.set_dict(model_dict)
            optimizer.set_dict(opti_dict)
            global_step = args.fastspeech_step
@ -76,31 +99,42 @@ def main(args):
            pbar = tqdm(reader)

            for i, data in enumerate(pbar):
-                pbar.set_description('Processing at epoch %d'%epoch)
+                pbar.set_description('Processing at epoch %d' % epoch)
                character, mel, mel_input, pos_text, pos_mel, text_length, mel_lens = data

-                _, _, attn_probs, _, _, _ = transformerTTS(character, mel_input, pos_text, pos_mel)
-                alignment = dg.to_variable(get_alignment(attn_probs, mel_lens, cfg['transformer_head'])).astype(np.float32)
+                _, _, attn_probs, _, _, _ = transformerTTS(
+                    character, mel_input, pos_text, pos_mel)
+                alignment = dg.to_variable(
+                    get_alignment(attn_probs, mel_lens, cfg[
+                        'transformer_head'])).astype(np.float32)

                global_step += 1

                #Forward
-                result= model(character, 
-                              pos_text, 
-                              mel_pos=pos_mel,  
-                              length_target=alignment)
+                result = model(
+                    character,
+                    pos_text,
+                    mel_pos=pos_mel,
+                    length_target=alignment)
                mel_output, mel_output_postnet, duration_predictor_output, _, _ = result
                mel_loss = layers.mse_loss(mel_output, mel)
                mel_postnet_loss = layers.mse_loss(mel_output_postnet, mel)
-                duration_loss = layers.mean(layers.abs(layers.elementwise_sub(duration_predictor_output, alignment)))
+                duration_loss = layers.mean(
+                    layers.abs(
+                        layers.elementwise_sub(duration_predictor_output,
+                                               alignment)))
                total_loss = mel_loss + mel_postnet_loss + duration_loss

-                if local_rank==0:
-                    writer.add_scalar('mel_loss', mel_loss.numpy(), global_step)
-                    writer.add_scalar('post_mel_loss', mel_postnet_loss.numpy(), global_step)
-                    writer.add_scalar('duration_loss', duration_loss.numpy(), global_step)
-                    writer.add_scalar('learning_rate', optimizer._learning_rate.step().numpy(), global_step)
-
+                if local_rank == 0:
+                    writer.add_scalar('mel_loss',
+                                      mel_loss.numpy(), global_step)
+                    writer.add_scalar('post_mel_loss',
+                                      mel_postnet_loss.numpy(), global_step)
+                    writer.add_scalar('duration_loss',
+                                      duration_loss.numpy(), global_step)
+                    writer.add_scalar('learning_rate',
+                                      optimizer._learning_rate.step().numpy(),
+                                      global_step)

                if args.use_data_parallel:
                    total_loss = model.scale_loss(total_loss)
@ -108,21 +142,25 @@ def main(args):
                    model.apply_collective_grads()
                else:
                    total_loss.backward()
-                optimizer.minimize(total_loss, grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(cfg['grad_clip_thresh']))
+                optimizer.minimize(
+                    total_loss,
+                    grad_clip=fluid.dygraph_grad_clip.GradClipByGlobalNorm(cfg[
+                        'grad_clip_thresh']))
                model.clear_gradients()

-                 # save checkpoint
-                if local_rank==0 and global_step % args.save_step == 0:
+                # save checkpoint
+                if local_rank == 0 and global_step % args.save_step == 0:
                    if not os.path.exists(args.save_path):
                        os.mkdir(args.save_path)
-                    save_path = os.path.join(args.save_path,'fastspeech/%d' % global_step)
+                    save_path = os.path.join(args.save_path,
+                                             'fastspeech/%d' % global_step)
                    dg.save_dygraph(model.state_dict(), save_path)
                    dg.save_dygraph(optimizer.state_dict(), save_path)
-        if local_rank==0:
+        if local_rank == 0:
            writer.close()


-if __name__ =='__main__':
+if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Train Fastspeech model")
    add_config_options_to_parser(parser)
    args = parser.parse_args()
--- a/examples/transformer_tts/data.py
+++ b/examples/transformer_tts/data.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from pathlib import Path
 import numpy as np
 import pandas as pd
@ -12,22 +25,42 @@ from parakeet.data.datacargo import DataCargo
 from parakeet.data.batch import TextIDBatcher, SpecBatcher
 from parakeet.data.dataset import DatasetMixin, TransformDataset

+
 class LJSpeechLoader:
-    def __init__(self, config, args, nranks, rank, is_vocoder=False, shuffle=True):
+    def __init__(self,
+                 config,
+                 args,
+                 nranks,
+                 rank,
+                 is_vocoder=False,
+                 shuffle=True):
        place = fluid.CUDAPlace(rank) if args.use_gpu else fluid.CPUPlace()

        LJSPEECH_ROOT = Path(args.data_path)
        metadata = LJSpeechMetaData(LJSPEECH_ROOT)
        transformer = LJSpeech(config)
        dataset = TransformDataset(metadata, transformer)
-        sampler = DistributedSampler(len(metadata), nranks, rank, shuffle=shuffle)
+        sampler = DistributedSampler(
+            len(metadata), nranks, rank, shuffle=shuffle)

        assert args.batch_size % nranks == 0
        each_bs = args.batch_size // nranks
        if is_vocoder:
-            dataloader = DataCargo(dataset, sampler=sampler, batch_size=each_bs, shuffle=shuffle, batch_fn=batch_examples_vocoder, drop_last=True)
+            dataloader = DataCargo(
+                dataset,
+                sampler=sampler,
+                batch_size=each_bs,
+                shuffle=shuffle,
+                batch_fn=batch_examples_vocoder,
+                drop_last=True)
        else:
-            dataloader = DataCargo(dataset, sampler=sampler, batch_size=each_bs, shuffle=shuffle, batch_fn=batch_examples, drop_last=True)
+            dataloader = DataCargo(
+                dataset,
+                sampler=sampler,
+                batch_size=each_bs,
+                shuffle=shuffle,
+                batch_fn=batch_examples,
+                drop_last=True)

        self.reader = fluid.io.DataLoader.from_generator(
            capacity=32,
@ -68,8 +101,8 @@ class LJSpeech(object):
            min_level_db=config['audio']['min_level_db'],
            ref_level_db=config['audio']['ref_level_db'],
            n_fft=config['audio']['n_fft'],
-            win_length= config['audio']['win_length'], 
-            hop_length= config['audio']['hop_length'],
+            win_length=config['audio']['win_length'],
+            hop_length=config['audio']['hop_length'],
            power=config['audio']['power'],
            preemphasis=config['audio']['preemphasis'],
            signal_norm=True,
@ -95,8 +128,10 @@ class LJSpeech(object):
        wav = self._ljspeech_processor.load_wav(str(fname))
        mag = self._ljspeech_processor.spectrogram(wav).astype(np.float32)
        mel = self._ljspeech_processor.melspectrogram(wav).astype(np.float32)
-        phonemes = np.array(g2p.en.text_to_sequence(normalized_text), dtype=np.int64)
-        return (mag, mel, phonemes) # maybe we need to implement it as a map in the future
+        phonemes = np.array(
+            g2p.en.text_to_sequence(normalized_text), dtype=np.int64)
+        return (mag, mel, phonemes
+                )  # maybe we need to implement it as a map in the future


 def batch_examples(batch):
@ -109,7 +144,10 @@ def batch_examples(batch):
    pos_mels = []
    for data in batch:
        _, mel, text = data
-        mel_inputs.append(np.concatenate([np.zeros([mel.shape[0], 1], np.float32), mel[:,:-1]], axis=-1))
+        mel_inputs.append(
+            np.concatenate(
+                [np.zeros([mel.shape[0], 1], np.float32), mel[:, :-1]],
+                axis=-1))
        mel_lens.append(mel.shape[1])
        text_lens.append(len(text))
        pos_texts.append(np.arange(1, len(text) + 1))
@ -118,35 +156,59 @@ def batch_examples(batch):
        texts.append(text)

    # Sort by text_len in descending order
-    texts = [i for i,_ in sorted(zip(texts, text_lens), key=lambda x: x[1], reverse=True)]
-    mels = [i for i,_ in sorted(zip(mels, text_lens), key=lambda x: x[1], reverse=True)]
-    mel_inputs = [i for i,_ in sorted(zip(mel_inputs, text_lens), key=lambda x: x[1], reverse=True)]
-    mel_lens = [i for i,_ in sorted(zip(mel_lens, text_lens), key=lambda x: x[1], reverse=True)]
-    pos_texts = [i for i,_ in sorted(zip(pos_texts, text_lens), key=lambda x: x[1], reverse=True)]
-    pos_mels = [i for i,_ in sorted(zip(pos_mels, text_lens), key=lambda x: x[1], reverse=True)]
+    texts = [
+        i
+        for i, _ in sorted(
+            zip(texts, text_lens), key=lambda x: x[1], reverse=True)
+    ]
+    mels = [
+        i
+        for i, _ in sorted(
+            zip(mels, text_lens), key=lambda x: x[1], reverse=True)
+    ]
+    mel_inputs = [
+        i
+        for i, _ in sorted(
+            zip(mel_inputs, text_lens), key=lambda x: x[1], reverse=True)
+    ]
+    mel_lens = [
+        i
+        for i, _ in sorted(
+            zip(mel_lens, text_lens), key=lambda x: x[1], reverse=True)
+    ]
+    pos_texts = [
+        i
+        for i, _ in sorted(
+            zip(pos_texts, text_lens), key=lambda x: x[1], reverse=True)
+    ]
+    pos_mels = [
+        i
+        for i, _ in sorted(
+            zip(pos_mels, text_lens), key=lambda x: x[1], reverse=True)
+    ]
    text_lens = sorted(text_lens, reverse=True)

    # Pad sequence with largest len of the batch
-    texts = TextIDBatcher(pad_id=0)(texts)   #(B, T)
-    pos_texts = TextIDBatcher(pad_id=0)(pos_texts) #(B,T)
-    pos_mels = TextIDBatcher(pad_id=0)(pos_mels) #(B,T)
-    mels = np.transpose(SpecBatcher(pad_value=0.)(mels), axes=(0,2,1)) #(B,T,num_mels)
-    mel_inputs = np.transpose(SpecBatcher(pad_value=0.)(mel_inputs), axes=(0,2,1))#(B,T,num_mels)
-    return (texts, mels, mel_inputs, pos_texts, pos_mels, np.array(text_lens), np.array(mel_lens))
+    texts = TextIDBatcher(pad_id=0)(texts)  #(B, T)
+    pos_texts = TextIDBatcher(pad_id=0)(pos_texts)  #(B,T)
+    pos_mels = TextIDBatcher(pad_id=0)(pos_mels)  #(B,T)
+    mels = np.transpose(
+        SpecBatcher(pad_value=0.)(mels), axes=(0, 2, 1))  #(B,T,num_mels)
+    mel_inputs = np.transpose(
+        SpecBatcher(pad_value=0.)(mel_inputs), axes=(0, 2, 1))  #(B,T,num_mels)
+    return (texts, mels, mel_inputs, pos_texts, pos_mels, np.array(text_lens),
+            np.array(mel_lens))
+

 def batch_examples_vocoder(batch):
-    mels=[]
-    mags=[]
+    mels = []
+    mags = []
    for data in batch:
        mag, mel, _ = data
        mels.append(mel)
        mags.append(mag)

-    mels = np.transpose(SpecBatcher(pad_value=0.)(mels), axes=(0,2,1))
-    mags = np.transpose(SpecBatcher(pad_value=0.)(mags), axes=(0,2,1))
+    mels = np.transpose(SpecBatcher(pad_value=0.)(mels), axes=(0, 2, 1))
+    mags = np.transpose(SpecBatcher(pad_value=0.)(mags), axes=(0, 2, 1))

    return (mels, mags)
-
-
-
-        
--- a/examples/transformer_tts/parse.py
+++ b/examples/transformer_tts/parse.py
@ -1,38 +1,100 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse

+
 def add_config_options_to_parser(parser):
-    parser.add_argument('--config_path', type=str, default='config/train_transformer.yaml',
+    parser.add_argument(
+        '--config_path',
+        type=str,
+        default='config/train_transformer.yaml',
        help="the yaml config file path.")
-    parser.add_argument('--batch_size', type=int, default=32,
-        help="batch size for training.")
-    parser.add_argument('--epochs', type=int, default=10000,
+    parser.add_argument(
+        '--batch_size', type=int, default=32, help="batch size for training.")
+    parser.add_argument(
+        '--epochs',
+        type=int,
+        default=10000,
        help="the number of epoch for training.")
-    parser.add_argument('--lr', type=float, default=0.001,
+    parser.add_argument(
+        '--lr',
+        type=float,
+        default=0.001,
        help="the learning rate for training.")
-    parser.add_argument('--save_step', type=int, default=500,
+    parser.add_argument(
+        '--save_step',
+        type=int,
+        default=500,
        help="checkpointing interval during training.")
-    parser.add_argument('--image_step', type=int, default=2000,
+    parser.add_argument(
+        '--image_step',
+        type=int,
+        default=2000,
        help="attention image interval during training.")
-    parser.add_argument('--max_len', type=int, default=400,
+    parser.add_argument(
+        '--max_len',
+        type=int,
+        default=400,
        help="The max length of audio when synthsis.")
-    parser.add_argument('--transformer_step', type=int, default=160000,
+    parser.add_argument(
+        '--transformer_step',
+        type=int,
+        default=160000,
        help="Global step to restore checkpoint of transformer.")
-    parser.add_argument('--vocoder_step', type=int, default=90000,
+    parser.add_argument(
+        '--vocoder_step',
+        type=int,
+        default=90000,
        help="Global step to restore checkpoint of postnet.")
-    parser.add_argument('--use_gpu', type=int, default=1,
+    parser.add_argument(
+        '--use_gpu',
+        type=int,
+        default=1,
        help="use gpu or not during training.")
-    parser.add_argument('--use_data_parallel', type=int, default=0,
+    parser.add_argument(
+        '--use_data_parallel',
+        type=int,
+        default=0,
        help="use data parallel or not during training.")
-    parser.add_argument('--stop_token', type=int, default=0,
+    parser.add_argument(
+        '--stop_token',
+        type=int,
+        default=0,
        help="use stop token loss in network or not.")

-    parser.add_argument('--data_path', type=str, default='./dataset/LJSpeech-1.1',
+    parser.add_argument(
+        '--data_path',
+        type=str,
+        default='./dataset/LJSpeech-1.1',
        help="the path of dataset.")
-    parser.add_argument('--checkpoint_path', type=str, default=None,
+    parser.add_argument(
+        '--checkpoint_path',
+        type=str,
+        default=None,
        help="the path to load checkpoint or pretrain model.")
-    parser.add_argument('--save_path', type=str, default='./checkpoint',
+    parser.add_argument(
+        '--save_path',
+        type=str,
+        default='./checkpoint',
        help="the path to save checkpoint.")
-    parser.add_argument('--log_dir', type=str, default='./log',
+    parser.add_argument(
+        '--log_dir',
+        type=str,
+        default='./log',
        help="the directory to save tensorboard log.")
-    parser.add_argument('--sample_path', type=str, default='./sample',
+    parser.add_argument(
+        '--sample_path',
+        type=str,
+        default='./sample',
        help="the directory to save audio sample in synthesis.")
--- a/examples/transformer_tts/synthesis.py
+++ b/examples/transformer_tts/synthesis.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 from scipy.io.wavfile import write
 from parakeet.g2p.en import text_to_sequence
@ -16,6 +29,7 @@ from parakeet import audio
 from parakeet.models.transformer_tts.vocoder import Vocoder
 from parakeet.models.transformer_tts.transformer_tts import TransformerTTS

+
 def load_checkpoint(step, model_path):
    model_dict, _ = fluid.dygraph.load_dygraph(os.path.join(model_path, step))
    new_state_dict = OrderedDict()
@ -26,6 +40,7 @@ def load_checkpoint(step, model_path):
            new_state_dict[param] = model_dict[param]
    return new_state_dict

+
 def synthesis(text_input, args):
    place = (fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace())

@ -34,36 +49,43 @@ def synthesis(text_input, args):

    # tensorboard
    if not os.path.exists(args.log_dir):
-            os.mkdir(args.log_dir)
-    path = os.path.join(args.log_dir,'synthesis')
+        os.mkdir(args.log_dir)
+    path = os.path.join(args.log_dir, 'synthesis')

    writer = SummaryWriter(path)

    with dg.guard(place):
        with fluid.unique_name.guard():
            model = TransformerTTS(cfg)
-            model.set_dict(load_checkpoint(str(args.transformer_step), os.path.join(args.checkpoint_path, "transformer")))
+            model.set_dict(
+                load_checkpoint(
+                    str(args.transformer_step),
+                    os.path.join(args.checkpoint_path, "transformer")))
            model.eval()

        with fluid.unique_name.guard():
            model_vocoder = Vocoder(cfg, args.batch_size)
-            model_vocoder.set_dict(load_checkpoint(str(args.vocoder_step), os.path.join(args.checkpoint_path, "vocoder")))
+            model_vocoder.set_dict(
+                load_checkpoint(
+                    str(args.vocoder_step),
+                    os.path.join(args.checkpoint_path, "vocoder")))
            model_vocoder.eval()
        # init input
        text = np.asarray(text_to_sequence(text_input))
-        text = fluid.layers.unsqueeze(dg.to_variable(text),[0])
-        mel_input = dg.to_variable(np.zeros([1,1,80])).astype(np.float32)
-        pos_text = np.arange(1, text.shape[1]+1)
-        pos_text = fluid.layers.unsqueeze(dg.to_variable(pos_text),[0])
-        
+        text = fluid.layers.unsqueeze(dg.to_variable(text), [0])
+        mel_input = dg.to_variable(np.zeros([1, 1, 80])).astype(np.float32)
+        pos_text = np.arange(1, text.shape[1] + 1)
+        pos_text = fluid.layers.unsqueeze(dg.to_variable(pos_text), [0])

        pbar = tqdm(range(args.max_len))

        for i in pbar:
-            pos_mel = np.arange(1, mel_input.shape[1]+1)
-            pos_mel = fluid.layers.unsqueeze(dg.to_variable(pos_mel),[0])
-            mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = model(text, mel_input, pos_text, pos_mel)
-            mel_input = fluid.layers.concat([mel_input, postnet_pred[:,-1:,:]], axis=1)
+            pos_mel = np.arange(1, mel_input.shape[1] + 1)
+            pos_mel = fluid.layers.unsqueeze(dg.to_variable(pos_mel), [0])
+            mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = model(
+                text, mel_input, pos_text, pos_mel)
+            mel_input = fluid.layers.concat(
+                [mel_input, postnet_pred[:, -1:, :]], axis=1)
        mag_pred = model_vocoder(postnet_pred)

        _ljspeech_processor = audio.AudioProcessor(
@ -72,8 +94,8 @@ def synthesis(text_input, args):
            min_level_db=cfg['audio']['min_level_db'],
            ref_level_db=cfg['audio']['ref_level_db'],
            n_fft=cfg['audio']['n_fft'],
-            win_length= cfg['audio']['win_length'], 
-            hop_length= cfg['audio']['hop_length'],
+            win_length=cfg['audio']['win_length'],
+            hop_length=cfg['audio']['hop_length'],
            power=cfg['audio']['power'],
            preemphasis=cfg['audio']['preemphasis'],
            signal_norm=True,
@ -86,13 +108,18 @@ def synthesis(text_input, args):
            do_trim_silence=False,
            sound_norm=False)

-        wav = _ljspeech_processor.inv_spectrogram(fluid.layers.transpose(fluid.layers.squeeze(mag_pred,[0]), [1,0]).numpy())
+        wav = _ljspeech_processor.inv_spectrogram(
+            fluid.layers.transpose(
+                fluid.layers.squeeze(mag_pred, [0]), [1, 0]).numpy())
        writer.add_audio(text_input, wav, 0, cfg['audio']['sr'])
        if not os.path.exists(args.sample_path):
            os.mkdir(args.sample_path)
-        write(os.path.join(args.sample_path,'test.wav'), cfg['audio']['sr'], wav)
+        write(
+            os.path.join(args.sample_path, 'test.wav'), cfg['audio']['sr'],
+            wav)
    writer.close()

+
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Synthesis model")
    add_config_options_to_parser(parser)
--- a/examples/transformer_tts/train_transformer.py
+++ b/examples/transformer_tts/train_transformer.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 from tqdm import tqdm
 from tensorboardX import SummaryWriter
@ -16,8 +29,10 @@ from parakeet.models.transformer_tts.utils import cross_entropy
 from data import LJSpeechLoader
 from parakeet.models.transformer_tts.transformer_tts import TransformerTTS

+
 def load_checkpoint(step, model_path):
-    model_dict, opti_dict = fluid.dygraph.load_dygraph(os.path.join(model_path, step))
+    model_dict, opti_dict = fluid.dygraph.load_dygraph(
+        os.path.join(model_path, step))
    new_state_dict = OrderedDict()
    for param in model_dict:
        if param.startswith('_layers.'):
@ -40,8 +55,8 @@ def main(args):
             if args.use_gpu else fluid.CPUPlace())

    if not os.path.exists(args.log_dir):
-            os.mkdir(args.log_dir)
-    path = os.path.join(args.log_dir,'transformer')
+        os.mkdir(args.log_dir)
+    path = os.path.join(args.log_dir, 'transformer')

    writer = SummaryWriter(path) if local_rank == 0 else None

@ -49,13 +64,18 @@ def main(args):
        model = TransformerTTS(cfg)

        model.train()
-        optimizer = fluid.optimizer.AdamOptimizer(learning_rate=dg.NoamDecay(1/(cfg['warm_up_step'] *( args.lr ** 2)), cfg['warm_up_step']), 
-                                                  parameter_list=model.parameters())
+        optimizer = fluid.optimizer.AdamOptimizer(
+            learning_rate=dg.NoamDecay(1 / (
+                cfg['warm_up_step'] * (args.lr**2)), cfg['warm_up_step']),
+            parameter_list=model.parameters())

-        reader = LJSpeechLoader(cfg, args, nranks, local_rank, shuffle=True).reader()
+        reader = LJSpeechLoader(
+            cfg, args, nranks, local_rank, shuffle=True).reader()

        if args.checkpoint_path is not None:
-            model_dict, opti_dict = load_checkpoint(str(args.transformer_step), os.path.join(args.checkpoint_path, "transformer"))
+            model_dict, opti_dict = load_checkpoint(
+                str(args.transformer_step),
+                os.path.join(args.checkpoint_path, "transformer"))
            model.set_dict(model_dict)
            optimizer.set_dict(opti_dict)
            global_step = args.transformer_step
@ -68,60 +88,82 @@ def main(args):
        for epoch in range(args.epochs):
            pbar = tqdm(reader)
            for i, data in enumerate(pbar):
-                pbar.set_description('Processing at epoch %d'%epoch)
+                pbar.set_description('Processing at epoch %d' % epoch)
                character, mel, mel_input, pos_text, pos_mel, text_length, _ = data

                global_step += 1
-                mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = model(character, mel_input, pos_text, pos_mel)
-                
+                mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = model(
+                    character, mel_input, pos_text, pos_mel)

                label = (pos_mel == 0).astype(np.float32)

-                mel_loss = layers.mean(layers.abs(layers.elementwise_sub(mel_pred, mel)))
-                post_mel_loss = layers.mean(layers.abs(layers.elementwise_sub(postnet_pred, mel)))
+                mel_loss = layers.mean(
+                    layers.abs(layers.elementwise_sub(mel_pred, mel)))
+                post_mel_loss = layers.mean(
+                    layers.abs(layers.elementwise_sub(postnet_pred, mel)))
                loss = mel_loss + post_mel_loss
                # Note: When used stop token loss the learning did not work.
                if args.stop_token:
                    stop_loss = cross_entropy(stop_preds, label)
                    loss = loss + stop_loss

-                if local_rank==0:
+                if local_rank == 0:
                    writer.add_scalars('training_loss', {
-                        'mel_loss':mel_loss.numpy(),
-                        'post_mel_loss':post_mel_loss.numpy()
+                        'mel_loss': mel_loss.numpy(),
+                        'post_mel_loss': post_mel_loss.numpy()
                    }, global_step)

                    if args.stop_token:
-                        writer.add_scalar('stop_loss', stop_loss.numpy(), global_step)
+                        writer.add_scalar('stop_loss',
+                                          stop_loss.numpy(), global_step)

                    if args.use_data_parallel:
                        writer.add_scalars('alphas', {
-                            'encoder_alpha':model._layers.encoder.alpha.numpy(),
-                            'decoder_alpha':model._layers.decoder.alpha.numpy(),
+                            'encoder_alpha':
+                            model._layers.encoder.alpha.numpy(),
+                            'decoder_alpha':
+                            model._layers.decoder.alpha.numpy(),
                        }, global_step)
                    else:
                        writer.add_scalars('alphas', {
-                            'encoder_alpha':model.encoder.alpha.numpy(),
-                            'decoder_alpha':model.decoder.alpha.numpy(),
+                            'encoder_alpha': model.encoder.alpha.numpy(),
+                            'decoder_alpha': model.decoder.alpha.numpy(),
                        }, global_step)

-                    writer.add_scalar('learning_rate', optimizer._learning_rate.step().numpy(), global_step)
+                    writer.add_scalar('learning_rate',
+                                      optimizer._learning_rate.step().numpy(),
+                                      global_step)

                    if global_step % args.image_step == 1:
                        for i, prob in enumerate(attn_probs):
                            for j in range(4):
-                                    x = np.uint8(cm.viridis(prob.numpy()[j*16]) * 255)
-                                    writer.add_image('Attention_%d_0'%global_step, x, i*4+j, dataformats="HWC")
+                                x = np.uint8(
+                                    cm.viridis(prob.numpy()[j * 16]) * 255)
+                                writer.add_image(
+                                    'Attention_%d_0' % global_step,
+                                    x,
+                                    i * 4 + j,
+                                    dataformats="HWC")

                        for i, prob in enumerate(attn_enc):
                            for j in range(4):
-                                x = np.uint8(cm.viridis(prob.numpy()[j*16]) * 255)
-                                writer.add_image('Attention_enc_%d_0'%global_step, x, i*4+j, dataformats="HWC")
+                                x = np.uint8(
+                                    cm.viridis(prob.numpy()[j * 16]) * 255)
+                                writer.add_image(
+                                    'Attention_enc_%d_0' % global_step,
+                                    x,
+                                    i * 4 + j,
+                                    dataformats="HWC")

                        for i, prob in enumerate(attn_dec):
                            for j in range(4):
-                                x = np.uint8(cm.viridis(prob.numpy()[j*16]) * 255)
-                                writer.add_image('Attention_dec_%d_0'%global_step, x, i*4+j, dataformats="HWC")
+                                x = np.uint8(
+                                    cm.viridis(prob.numpy()[j * 16]) * 255)
+                                writer.add_image(
+                                    'Attention_dec_%d_0' % global_step,
+                                    x,
+                                    i * 4 + j,
+                                    dataformats="HWC")

                if args.use_data_parallel:
                    loss = model.scale_loss(loss)
@ -129,21 +171,25 @@ def main(args):
                    model.apply_collective_grads()
                else:
                    loss.backward()
-                optimizer.minimize(loss, grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(cfg['grad_clip_thresh']))
+                optimizer.minimize(
+                    loss,
+                    grad_clip=fluid.dygraph_grad_clip.GradClipByGlobalNorm(cfg[
+                        'grad_clip_thresh']))
                model.clear_gradients()

                # save checkpoint
-                if local_rank==0 and global_step % args.save_step == 0:
+                if local_rank == 0 and global_step % args.save_step == 0:
                    if not os.path.exists(args.save_path):
                        os.mkdir(args.save_path)
-                    save_path = os.path.join(args.save_path,'transformer/%d' % global_step)
+                    save_path = os.path.join(args.save_path,
+                                             'transformer/%d' % global_step)
                    dg.save_dygraph(model.state_dict(), save_path)
                    dg.save_dygraph(optimizer.state_dict(), save_path)
-        if local_rank==0:
+        if local_rank == 0:
            writer.close()


-if __name__ =='__main__':
+if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Train TransformerTTS model")
    add_config_options_to_parser(parser)

--- a/examples/transformer_tts/train_vocoder.py
+++ b/examples/transformer_tts/train_vocoder.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from tensorboardX import SummaryWriter
 import os
 from tqdm import tqdm
@ -13,6 +26,7 @@ import paddle.fluid.layers as layers
 from data import LJSpeechLoader
 from parakeet.models.transformer_tts.vocoder import Vocoder

+
 def load_checkpoint(step, model_path):
    model_dict, opti_dict = dg.load_dygraph(os.path.join(model_path, step))
    new_state_dict = OrderedDict()
@ -23,6 +37,7 @@ def load_checkpoint(step, model_path):
            new_state_dict[param] = model_dict[param]
    return new_state_dict, opti_dict

+
 def main(args):

    local_rank = dg.parallel.Env().local_rank if args.use_data_parallel else 0
@ -37,8 +52,8 @@ def main(args):
             if args.use_gpu else fluid.CPUPlace())

    if not os.path.exists(args.log_dir):
-            os.mkdir(args.log_dir)
-    path = os.path.join(args.log_dir,'vocoder')
+        os.mkdir(args.log_dir)
+    path = os.path.join(args.log_dir, 'vocoder')

    writer = SummaryWriter(path) if local_rank == 0 else None

@ -46,12 +61,15 @@ def main(args):
        model = Vocoder(cfg, args.batch_size)

        model.train()
-        optimizer = fluid.optimizer.AdamOptimizer(learning_rate=dg.NoamDecay(1/(cfg['warm_up_step'] *( args.lr ** 2)), cfg['warm_up_step']),
-                                                  parameter_list=model.parameters())
-
+        optimizer = fluid.optimizer.AdamOptimizer(
+            learning_rate=dg.NoamDecay(1 / (
+                cfg['warm_up_step'] * (args.lr**2)), cfg['warm_up_step']),
+            parameter_list=model.parameters())

        if args.checkpoint_path is not None:
-            model_dict, opti_dict = load_checkpoint(str(args.vocoder_step), os.path.join(args.checkpoint_path, "vocoder"))
+            model_dict, opti_dict = load_checkpoint(
+                str(args.vocoder_step),
+                os.path.join(args.checkpoint_path, "vocoder"))
            model.set_dict(model_dict)
            optimizer.set_dict(opti_dict)
            global_step = args.vocoder_step
@ -61,19 +79,21 @@ def main(args):
            strategy = dg.parallel.prepare_context()
            model = fluid.dygraph.parallel.DataParallel(model, strategy)

-        reader = LJSpeechLoader(cfg, args, nranks, local_rank, is_vocoder=True).reader()
+        reader = LJSpeechLoader(
+            cfg, args, nranks, local_rank, is_vocoder=True).reader()

        for epoch in range(args.epochs):
            pbar = tqdm(reader)
            for i, data in enumerate(pbar):
-                pbar.set_description('Processing at epoch %d'%epoch)
+                pbar.set_description('Processing at epoch %d' % epoch)
                mel, mag = data
                mag = dg.to_variable(mag.numpy())
                mel = dg.to_variable(mel.numpy())
                global_step += 1

                mag_pred = model(mel)
-                loss = layers.mean(layers.abs(layers.elementwise_sub(mag_pred, mag)))
+                loss = layers.mean(
+                    layers.abs(layers.elementwise_sub(mag_pred, mag)))

                if args.use_data_parallel:
                    loss = model.scale_loss(loss)
@ -81,24 +101,29 @@ def main(args):
                    model.apply_collective_grads()
                else:
                    loss.backward()
-                optimizer.minimize(loss, grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(cfg['grad_clip_thresh']))
+                optimizer.minimize(
+                    loss,
+                    grad_clip=fluid.dygraph_grad_clip.GradClipByGlobalNorm(cfg[
+                        'grad_clip_thresh']))
                model.clear_gradients()

-                if local_rank==0:
-                    writer.add_scalars('training_loss',{
-                        'loss':loss.numpy(),
+                if local_rank == 0:
+                    writer.add_scalars('training_loss', {
+                        'loss': loss.numpy(),
                    }, global_step)

                    if global_step % args.save_step == 0:
                        if not os.path.exists(args.save_path):
                            os.mkdir(args.save_path)
-                        save_path = os.path.join(args.save_path,'vocoder/%d' % global_step)
+                        save_path = os.path.join(args.save_path,
+                                                 'vocoder/%d' % global_step)
                        dg.save_dygraph(model.state_dict(), save_path)
                        dg.save_dygraph(optimizer.state_dict(), save_path)

-        if local_rank==0:
+        if local_rank == 0:
            writer.close()

+
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Train vocoder model")
    add_config_options_to_parser(parser)
--- a/examples/waveflow/benchmark.py
+++ b/examples/waveflow/benchmark.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import random
 from pprint import pprint
--- a/examples/waveflow/synthesis.py
+++ b/examples/waveflow/synthesis.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import random
 from pprint import pprint
--- a/examples/waveflow/train.py
+++ b/examples/waveflow/train.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import random
 import subprocess
--- a/examples/waveflow/utils.py
+++ b/examples/waveflow/utils.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import itertools
 import os
 import time
--- a/parakeet/init.py
+++ b/parakeet/init.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 __version__ = "0.0.0"

 from . import data, g2p, models, modules
--- a/parakeet/audio/init.py
+++ b/parakeet/audio/init.py
@ -1 +1,15 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .audio import AudioProcessor
--- a/parakeet/audio/audio.py
+++ b/parakeet/audio/audio.py
@ -1,30 +1,46 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import librosa
 import soundfile as sf
 import numpy as np
 import scipy.io
 import scipy.signal

+
 class AudioProcessor(object):
-    def __init__(self,
-                 sample_rate=None, # int, sampling rate
-                 num_mels=None, # int, bands of mel spectrogram
-                 min_level_db=None, # float, minimum level db
-                 ref_level_db=None, # float, reference level db
-                 n_fft=None, # int: number of samples in a frame for stft
-                 win_length=None, # int: the same meaning with n_fft
-                 hop_length=None, # int: number of samples between neighboring frame
-                 power=None, # float:power to raise before griffin-lim
-                 preemphasis=None, # float: preemphasis coefficident
-                 signal_norm=None, # 
-                 symmetric_norm=False, # bool, apply clip norm in [-max_norm, max_form]
-                 max_norm=None, # float, max norm
-                 mel_fmin=None, # int: mel spectrogram's minimum frequency
-                 mel_fmax=None, # int: mel spectrogram's maximum frequency
-                 clip_norm=True, # bool: clip spectrogram's norm
-                 griffin_lim_iters=None, # int:
-                 do_trim_silence=False, # bool: trim silence
-                 sound_norm=False,
-                 **kwargs):
+    def __init__(
+            self,
+            sample_rate=None,  # int, sampling rate
+            num_mels=None,  # int, bands of mel spectrogram
+            min_level_db=None,  # float, minimum level db
+            ref_level_db=None,  # float, reference level db
+            n_fft=None,  # int: number of samples in a frame for stft
+            win_length=None,  # int: the same meaning with n_fft
+            hop_length=None,  # int: number of samples between neighboring frame
+            power=None,  # float:power to raise before griffin-lim
+            preemphasis=None,  # float: preemphasis coefficident
+            signal_norm=None,  # 
+            symmetric_norm=False,  # bool, apply clip norm in [-max_norm, max_form]
+            max_norm=None,  # float, max norm
+            mel_fmin=None,  # int: mel spectrogram's minimum frequency
+            mel_fmax=None,  # int: mel spectrogram's maximum frequency
+            clip_norm=True,  # bool: clip spectrogram's norm
+            griffin_lim_iters=None,  # int:
+            do_trim_silence=False,  # bool: trim silence
+            sound_norm=False,
+            **kwargs):
        self.sample_rate = sample_rate
        self.num_mels = num_mels
        self.min_level_db = min_level_db
@ -52,7 +68,8 @@ class AudioProcessor(object):
        self.do_trim_silence = do_trim_silence

        self.sound_norm = sound_norm
-        self.num_freq, self.frame_length_ms, self.frame_shift_ms = self._stft_parameters()
+        self.num_freq, self.frame_length_ms, self.frame_shift_ms = self._stft_parameters(
+        )

    def _stft_parameters(self):
        """compute frame length and hop length in ms"""
@ -65,44 +82,54 @@ class AudioProcessor(object):
        """object repr"""
        cls_name_str = self.__class__.__name__
        members = vars(self)
-        dict_str = "\n".join(["  {}: {},".format(k, v) for k, v in members.items()])
+        dict_str = "\n".join(
+            ["  {}: {},".format(k, v) for k, v in members.items()])
        repr_str = "{}(\n{})\n".format(cls_name_str, dict_str)
        return repr_str

    def save_wav(self, path, wav):
        """save audio with scipy.io.wavfile in 16bit integers"""
        wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
-        scipy.io.wavfile.write(path, self.sample_rate, wav_norm.as_type(np.int16))
+        scipy.io.wavfile.write(path, self.sample_rate,
+                               wav_norm.as_type(np.int16))

    def load_wav(self, path, sr=None):
        """load wav -> trim_silence -> rescale"""

        x, sr = librosa.load(path, sr=None)
-        assert self.sample_rate == sr, "audio sample rate: {}Hz != processor sample rate: {}Hz".format(sr, self.sample_rate)
+        assert self.sample_rate == sr, "audio sample rate: {}Hz != processor sample rate: {}Hz".format(
+            sr, self.sample_rate)
        if self.do_trim_silence:
            try:
                x = self.trim_silence(x)
            except ValueError:
-                print(" [!] File cannot be trimmed for silence - {}".format(path))
+                print(" [!] File cannot be trimmed for silence - {}".format(
+                    path))
        if self.sound_norm:
-            x = x / x.max() * 0.9 # why 0.9 ?
+            x = x / x.max() * 0.9  # why 0.9 ?
        return x

    def trim_silence(self, wav):
        """Trim soilent parts with a threshold and 0.01s margin"""
        margin = int(self.sample_rate * 0.01)
-        wav = wav[margin: -margin]
-        trimed_wav = librosa.effects.trim(wav, top_db=60, frame_length=self.win_length, hop_length=self.hop_length)[0]
+        wav = wav[margin:-margin]
+        trimed_wav = librosa.effects.trim(
+            wav,
+            top_db=60,
+            frame_length=self.win_length,
+            hop_length=self.hop_length)[0]
        return trimed_wav

    def apply_preemphasis(self, x):
        if self.preemphasis == 0.:
-            raise RuntimeError(" !! Preemphasis coefficient should be positive. ")
+            raise RuntimeError(
+                " !! Preemphasis coefficient should be positive. ")
        return scipy.signal.lfilter([1., -self.preemphasis], [1.], x)

    def apply_inv_preemphasis(self, x):
        if self.preemphasis == 0.:
-            raise RuntimeError(" !! Preemphasis coefficient should be positive. ")
+            raise RuntimeError(
+                " !! Preemphasis coefficient should be positive. ")
        return scipy.signal.lfilter([1.], [1., -self.preemphasis], x)

    def _amplitude_to_db(self, x):
@ -125,12 +152,11 @@ class AudioProcessor(object):
        """return mel basis for mel scale"""
        if self.mel_fmax is not None:
            assert self.mel_fmax <= self.sample_rate // 2
-        return librosa.filters.mel(
-            self.sample_rate, 
-            self.n_fft, 
-            n_mels=self.num_mels,
-            fmin=self.mel_fmin,
-            fmax=self.mel_fmax)
+        return librosa.filters.mel(self.sample_rate,
+                                   self.n_fft,
+                                   n_mels=self.num_mels,
+                                   fmin=self.mel_fmin,
+                                   fmax=self.mel_fmax)

    def _normalize(self, S):
        """put values in [0, self.max_norm] or [-self.max_norm, self,max_norm]"""
@ -156,12 +182,15 @@ class AudioProcessor(object):
            if self.symmetric_norm:
                if self.clip_norm:
                    S_denorm = np.clip(S_denorm, -self.max_norm, self.max_norm)
-                S_denorm = (S_denorm + self.max_norm) * (-self.min_level_db) / (2 * self.max_norm) + self.min_level_db
+                S_denorm = (S_denorm + self.max_norm) * (
+                    -self.min_level_db) / (2 * self.max_norm
+                                           ) + self.min_level_db
                return S_denorm
            else:
                if self.clip_norm:
                    S_denorm = np.clip(S_denorm, 0, self.max_norm)
-                S_denorm = S_denorm * (-self.min_level_db)/ self.max_norm + self.min_level_db
+                S_denorm = S_denorm * (-self.min_level_db
+                                       ) / self.max_norm + self.min_level_db
                return S_denorm
        else:
            return S
@ -174,7 +203,8 @@ class AudioProcessor(object):
            hop_length=self.hop_length)

    def _istft(self, S):
-        return librosa.istft(S, hop_length=self.hop_length, win_length=self.win_length)
+        return librosa.istft(
+            S, hop_length=self.hop_length, win_length=self.win_length)

    def spectrogram(self, y):
        """compute linear spectrogram(amplitude)
@ -195,7 +225,8 @@ class AudioProcessor(object):
            D = self._stft(self.apply_preemphasis(y))
        else:
            D = self._stft(y)
-        S = self._amplitude_to_db(self._linear_to_mel(np.abs(D))) - self.ref_level_db
+        S = self._amplitude_to_db(self._linear_to_mel(np.abs(
+            D))) - self.ref_level_db
        return self._normalize(S)

    def inv_spectrogram(self, spectrogram):
@ -203,16 +234,16 @@ class AudioProcessor(object):
        S = self._denormalize(spectrogram)
        S = self._db_to_amplitude(S + self.ref_level_db)
        if self.preemphasis:
-            return self.apply_inv_preemphasis(self._griffin_lim(S ** self.power))
-        return self._griffin_lim(S ** self.power)
+            return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
+        return self._griffin_lim(S**self.power)

    def inv_melspectrogram(self, mel_spectrogram):
        S = self._denormalize(mel_spectrogram)
        S = self._db_to_amplitude(S + self.ref_level_db)
        S = self._mel_to_linear(np.abs(S))
        if self.preemphasis:
-            return self.apply_inv_preemphasis(self._griffin_lim(S ** self.power))
-        return self._griffin_lim(S ** self.power)
+            return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
+        return self._griffin_lim(S**self.power)

    def out_linear_to_mel(self, linear_spec):
        """convert output linear spec to mel spec"""
@ -234,18 +265,18 @@ class AudioProcessor(object):

    @staticmethod
    def mulaw_encode(wav, qc):
-        mu = 2 ** qc - 1
+        mu = 2**qc - 1
        # wav_abs = np.minimum(np.abs(wav), 1.0)
        signal = np.sign(wav) * np.log(1 + mu * np.abs(wav)) / np.log(1. + mu)
        # Quantize signal to the specified number of levels.
        signal = (signal + 1) / 2 * mu + 0.5
-        return np.floor(signal,)
+        return np.floor(signal, )

    @staticmethod
    def mulaw_decode(wav, qc):
        """Recovers waveform from quantized values."""
-        mu = 2 ** qc - 1
-        x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1)
+        mu = 2**qc - 1
+        x = np.sign(wav) / mu * ((1 + mu)**np.abs(wav) - 1)
        return x

    @staticmethod
--- a/parakeet/data/init.py
+++ b/parakeet/data/init.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .dataset import *
 from .datacargo import *
 from .sampler import *
--- a/parakeet/data/batch.py
+++ b/parakeet/data/batch.py
@ -1,10 +1,25 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 functions to make batch for arrays which satisfy some conditions.
 """
 import numpy as np

+
 class TextIDBatcher(object):
    """A wrapper class for a function to build a functor, which holds the configs to pass to the function."""
+
    def __init__(self, pad_id=0, dtype=np.int64):
        self.pad_id = pad_id
        self.dtype = dtype
@ -13,6 +28,7 @@ class TextIDBatcher(object):
        out = batch_text_id(minibatch, pad_id=self.pad_id, dtype=self.dtype)
        return out

+
 def batch_text_id(minibatch, pad_id=0, dtype=np.int64):
    """
    minibatch: List[Example]
@ -21,16 +37,21 @@ def batch_text_id(minibatch, pad_id=0, dtype=np.int64):
    peek_example = minibatch[0]
    assert len(peek_example.shape) == 1, "text example is an 1D tensor"

-    lengths = [example.shape[0] for example in minibatch] # assume (channel, n_samples) or (n_samples, )
+    lengths = [example.shape[0] for example in minibatch
+               ]  # assume (channel, n_samples) or (n_samples, )
    max_len = np.max(lengths)

    batch = []
    for example in minibatch:
        pad_len = max_len - example.shape[0]
-        batch.append(np.pad(example, [(0, pad_len)], mode='constant', constant_values=pad_id))
+        batch.append(
+            np.pad(example, [(0, pad_len)],
+                   mode='constant',
+                   constant_values=pad_id))

    return np.array(batch, dtype=dtype)

+
 class WavBatcher(object):
    def __init__(self, pad_value=0., dtype=np.float32):
        self.pad_value = pad_value
@ -40,6 +61,7 @@ class WavBatcher(object):
        out = batch_wav(minibatch, pad_value=self.pad_value, dtype=self.dtype)
        return out

+
 def batch_wav(minibatch, pad_value=0., dtype=np.float32):
    """
    minibatch: List[Example]
@ -52,16 +74,23 @@ def batch_wav(minibatch, pad_value=0., dtype=np.float32):
    elif len(peek_example.shape) == 2:
        mono_channel = False

-    lengths = [example.shape[-1] for example in minibatch] # assume (channel, n_samples) or (n_samples, )
+    lengths = [example.shape[-1] for example in minibatch
+               ]  # assume (channel, n_samples) or (n_samples, )
    max_len = np.max(lengths)

    batch = []
    for example in minibatch:
        pad_len = max_len - example.shape[-1]
        if mono_channel:
-            batch.append(np.pad(example, [(0, pad_len)], mode='constant', constant_values=pad_value))
+            batch.append(
+                np.pad(example, [(0, pad_len)],
+                       mode='constant',
+                       constant_values=pad_value))
        else:
-            batch.append(np.pad(example, [(0, 0), (0, pad_len)], mode='constant', constant_values=pad_value)) # what about PCM, no
+            batch.append(
+                np.pad(example, [(0, 0), (0, pad_len)],
+                       mode='constant',
+                       constant_values=pad_value))  # what about PCM, no

    return np.array(batch, dtype=dtype)

@ -75,6 +104,7 @@ class SpecBatcher(object):
        out = batch_spec(minibatch, pad_value=self.pad_value, dtype=self.dtype)
        return out

+
 def batch_spec(minibatch, pad_value=0., dtype=np.float32):
    """
    minibatch: List[Example]
@ -87,15 +117,22 @@ def batch_spec(minibatch, pad_value=0., dtype=np.float32):
    elif len(peek_example.shape) == 3:
        mono_channel = False

-    lengths = [example.shape[-1] for example in minibatch] # assume (channel, F, n_frame) or (F, n_frame)
+    lengths = [example.shape[-1] for example in minibatch
+               ]  # assume (channel, F, n_frame) or (F, n_frame)
    max_len = np.max(lengths)

    batch = []
    for example in minibatch:
        pad_len = max_len - example.shape[-1]
        if mono_channel:
-            batch.append(np.pad(example, [(0, 0), (0, pad_len)], mode='constant', constant_values=pad_value))
+            batch.append(
+                np.pad(example, [(0, 0), (0, pad_len)],
+                       mode='constant',
+                       constant_values=pad_value))
        else:
-            batch.append(np.pad(example, [(0, 0), (0, 0), (0, pad_len)], mode='constant', constant_values=pad_value)) # what about PCM, no
+            batch.append(
+                np.pad(example, [(0, 0), (0, 0), (0, pad_len)],
+                       mode='constant',
+                       constant_values=pad_value))  # what about PCM, no

    return np.array(batch, dtype=dtype)
--- a/parakeet/data/datacargo.py
+++ b/parakeet/data/datacargo.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import six
 from .sampler import SequentialSampler, RandomSampler, BatchSampler

--- a/parakeet/data/dataset.py
+++ b/parakeet/data/dataset.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import six
 import numpy as np

@ -9,8 +23,7 @@ class DatasetMixin(object):
        if isinstance(index, slice):
            start, stop, step = index.indices(len(self))
            return [
-                self.get_example(i)
-                for i in six.moves.range(start, stop, step)
+                self.get_example(i) for i in six.moves.range(start, stop, step)
            ]
        elif isinstance(index, (list, np.ndarray)):
            return [self.get_example(i) for i in index]
@ -180,8 +193,7 @@ class ChainDataset(DatasetMixin):

    def get_example(self, i):
        if i < 0:
-            raise IndexError(
-                "ChainDataset doesnot support negative indexing.")
+            raise IndexError("ChainDataset doesnot support negative indexing.")

        for dataset in self._datasets:
            if i < len(dataset):
--- a/parakeet/data/sampler.py
+++ b/parakeet/data/sampler.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 At most cases, we have non-stream dataset, which means we can random access it with __getitem__, and we can get the length of the dataset with __len__.

@ -6,10 +19,10 @@ This suffices for a sampler. We implemente sampler as iterable of valid indices.
 So the sampler is only responsible for generating valid indices.
 """

-
 import numpy as np
 import random

+
 class Sampler(object):
    def __init__(self, data_source):
        pass
@ -42,12 +55,14 @@ class RandomSampler(Sampler):
                             "replacement={}".format(self.replacement))

        if self._num_samples is not None and not replacement:
-            raise ValueError("With replacement=False, num_samples should not be specified, "
-                             "since a random permutation will be performed.")
+            raise ValueError(
+                "With replacement=False, num_samples should not be specified, "
+                "since a random permutation will be performed.")

        if not isinstance(self.num_samples, int) or self.num_samples <= 0:
            raise ValueError("num_samples should be a positive integer "
-                             "value, but got num_samples={}".format(self.num_samples))
+                             "value, but got num_samples={}".format(
+                                 self.num_samples))

    @property
    def num_samples(self):
@ -59,7 +74,9 @@ class RandomSampler(Sampler):
    def __iter__(self):
        n = len(self.data_source)
        if self.replacement:
-            return iter(np.random.randint(0, n, size=(self.num_samples,), dtype=np.int64).tolist())
+            return iter(
+                np.random.randint(
+                    0, n, size=(self.num_samples, ), dtype=np.int64).tolist())
        return iter(np.random.permutation(n).tolist())

    def __len__(self):
@ -76,7 +93,8 @@ class SubsetRandomSampler(Sampler):
        self.indices = indices

    def __iter__(self):
-        return (self.indices[i] for i in np.random.permutation(len(self.indices)))
+        return (self.indices[i]
+                for i in np.random.permutation(len(self.indices)))

    def __len__(self):
        return len(self.indices)
@ -89,9 +107,14 @@ class PartialyRandomizedSimilarTimeLengthSampler(Sampler):
    3. Permutate mini-batchs
    """

-    def __init__(self, lengths, batch_size=4, batch_group_size=None,
+    def __init__(self,
+                 lengths,
+                 batch_size=4,
+                 batch_group_size=None,
                 permutate=True):
-        _lengths = np.array(lengths, dtype=np.int64) # maybe better implement length as a sort key
+        _lengths = np.array(
+            lengths,
+            dtype=np.int64)  # maybe better implement length as a sort key
        self.lengths = np.sort(_lengths)
        self.sorted_indices = np.argsort(_lengths)

@ -112,13 +135,14 @@ class PartialyRandomizedSimilarTimeLengthSampler(Sampler):
        for i in range(len(indices) // batch_group_size):
            s = i * batch_group_size
            e = s + batch_group_size
-            random.shuffle(indices[s: e]) # inplace
+            random.shuffle(indices[s:e])  # inplace

        # Permutate batches
        if self.permutate:
            perm = np.arange(len(indices[:e]) // self.batch_size)
            random.shuffle(perm)
-            indices[:e] = indices[:e].reshape(-1, self.batch_size)[perm, :].reshape(-1)
+            indices[:e] = indices[:e].reshape(
+                -1, self.batch_size)[perm, :].reshape(-1)

        # Handle last elements
        s += batch_group_size
@ -150,14 +174,19 @@ class WeightedRandomSampler(Sampler):
    def __init__(self, weights, num_samples, replacement):
        if not isinstance(num_samples, int) or num_samples <= 0:
            raise ValueError("num_samples should be a positive integer "
-                             "value, but got num_samples={}".format(num_samples))
+                             "value, but got num_samples={}".format(
+                                 num_samples))
        self.weights = np.array(weights, dtype=np.float64)
        self.num_samples = num_samples
        self.replacement = replacement

    def __iter__(self):
-        return iter(np.random.choice(len(self.weights), size=(self.num_samples, ),  
-                                     replace=self.replacement, p=self.weights).tolist())
+        return iter(
+            np.random.choice(
+                len(self.weights),
+                size=(self.num_samples, ),
+                replace=self.replacement,
+                p=self.weights).tolist())

    def __len__(self):
        return self.num_samples
@ -184,7 +213,7 @@ class DistributedSampler(Sampler):

        # Subset samples for each trainer.
        indices = indices[self.rank:self.total_size:self.num_trainers]
-        assert len(indices) ==  self.num_samples
+        assert len(indices) == self.num_samples

        return iter(indices)

@ -209,8 +238,7 @@ class BatchSampler(Sampler):
    def __init__(self, sampler, batch_size, drop_last):
        if not isinstance(sampler, Sampler):
            raise ValueError("sampler should be an instance of "
-                             "Sampler, but got sampler={}"
-                             .format(sampler))
+                             "Sampler, but got sampler={}".format(sampler))
        if not isinstance(batch_size, int) or batch_size <= 0:
            raise ValueError("batch_size should be a positive integer value, "
                             "but got batch_size={}".format(batch_size))
--- a/parakeet/datasets/README.md
+++ b/parakeet/datasets/README.md
@ -15,8 +15,3 @@ One of the reasons we choose to load data lazily (only load metadata before hand
 For deep learning practice, we typically batch examples. So the dataset should comes with a method to batch examples. Assuming the record is implemented as a tuple with several items. When an item is represented as a fix-sized array, to batch them is trivial, just `np.stack` suffices. But for array with dynamic size, padding is needed. We decide to implement a batching method for each item. Then batching a record can be implemented by these methods. For a dataset, a `_batch_examples` should be implemented. But in most cases, you can choose one from `batching.py`.

 That is it!
-
-
-
-
-
--- a/parakeet/datasets/init.py
+++ b/parakeet/datasets/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/parakeet/datasets/ljspeech.py
+++ b/parakeet/datasets/ljspeech.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from pathlib import Path
 import numpy as np
 import pandas as pd
--- a/parakeet/datasets/vctk.py
+++ b/parakeet/datasets/vctk.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from pathlib import Path
 import pandas as pd
 from ruamel.yaml import YAML
@ -11,9 +25,11 @@ from parakeet.data.dataset import Dataset
 from parakeet.data.datacargo import DataCargo
 from parakeet.data.batch import TextIDBatcher, WavBatcher

+
 class VCTK(Dataset):
    def __init__(self, root):
-        assert isinstance(root, (str, Path)), "root should be a string or Path object"
+        assert isinstance(root, (
+            str, Path)), "root should be a string or Path object"
        self.root = root if isinstance(root, Path) else Path(root)
        self.text_root = self.root.joinpath("txt")
        self.wav_root = self.root.joinpath("wav48")
@ -24,10 +40,10 @@ class VCTK(Dataset):
        self.speaker_indices, self.metadata = self._load_metadata()

    def _load_metadata(self):
-        yaml=YAML(typ='safe')
+        yaml = YAML(typ='safe')
        speaker_indices = yaml.load(self.root.joinpath("speaker_indices.yaml"))
-        metadata = pd.read_csv(self.root.joinpath("metadata.csv"), 
-                               sep="|", quoting=3, header=1)
+        metadata = pd.read_csv(
+            self.root.joinpath("metadata.csv"), sep="|", quoting=3, header=1)
        return speaker_indices, metadata

    def _prepare_metadata(self):
@ -41,15 +57,19 @@ class VCTK(Dataset):
                        with io.open(str(text_file)) as f:
                            transcription = f.read().strip()
                    wav_file = text_file.with_suffix(".wav")
-                    metadata.append((wav_file.name, speaker_folder.name, transcription))
-        metadata = pd.DataFrame.from_records(metadata,
-                                             columns=["wave_file", "speaker", "text"])
+                    metadata.append(
+                        (wav_file.name, speaker_folder.name, transcription))
+        metadata = pd.DataFrame.from_records(
+            metadata, columns=["wave_file", "speaker", "text"])

        # save them
-        yaml=YAML(typ='safe')
+        yaml = YAML(typ='safe')
        yaml.dump(speaker_to_index, self.root.joinpath("speaker_indices.yaml"))
-        metadata.to_csv(self.root.joinpath("metadata.csv"), 
-                        sep="|", quoting=3, index=False)
+        metadata.to_csv(
+            self.root.joinpath("metadata.csv"),
+            sep="|",
+            quoting=3,
+            index=False)

    def _get_example(self, metadatum):
        wave_file, speaker, text = metadatum
@ -77,5 +97,3 @@ class VCTK(Dataset):
        speaker_batch = np.array(speaker_batch)
        phoneme_batch = TextIDBatcher(pad_id=0)(phoneme_batch)
        return wav_batch, speaker_batch, phoneme_batch
-
-    
--- a/parakeet/g2p/init.py
+++ b/parakeet/g2p/init.py
@ -1,5 +1,4 @@
 # coding: utf-8
-
 """Text processing frontend

 All frontend module should have the following functions:
--- a/parakeet/g2p/en/init.py
+++ b/parakeet/g2p/en/init.py
@ -32,6 +32,3 @@ def text_to_sequence(text, p=0.0):
    from ..text import text_to_sequence
    text = text_to_sequence(text, ["english_cleaners"])
    return text
-
-
-
--- a/parakeet/g2p/es/init.py
+++ b/parakeet/g2p/es/init.py
@ -12,6 +12,3 @@ def text_to_sequence(text, p=0.0):
    from ..text import text_to_sequence
    text = text_to_sequence(text, ["basic_cleaners"])
    return text
-
-
-
--- a/parakeet/g2p/jp/init.py
+++ b/parakeet/g2p/jp/init.py
@ -1,6 +1,5 @@
 # coding: utf-8

-
 import MeCab
 import jaconv
 from random import random
@ -30,9 +29,9 @@ def _yomi(mecab_result):


 def _mix_pronunciation(tokens, yomis, p):
-    return "".join(
-        yomis[idx] if yomis[idx] is not None and random() < p else tokens[idx]
-        for idx in range(len(tokens)))
+    return "".join(yomis[idx]
+                   if yomis[idx] is not None and random() < p else tokens[idx]
+                   for idx in range(len(tokens)))


 def mix_pronunciation(text, p):
@ -59,8 +58,7 @@ def normalize_delimitor(text):


 def text_to_sequence(text, p=0.0):
-    for c in [" ", "　", "「", "」", "『", "』", "・", "【", "】",
-              "（", "）", "(", ")"]:
+    for c in [" ", "　", "「", "」", "『", "』", "・", "【", "】", "（", "）", "(", ")"]:
        text = text.replace(c, "")
    text = text.replace("!", "！")
    text = text.replace("?", "？")
--- a/parakeet/g2p/ko/init.py
+++ b/parakeet/g2p/ko/init.py
@ -1,6 +1,5 @@
 # coding: utf-8

-
 from random import random

 n_vocab = 0xffff
@ -13,5 +12,6 @@ _tagger = None
 def text_to_sequence(text, p=0.0):
    return [ord(c) for c in text] + [_eos]  # EOS

+
 def sequence_to_text(seq):
    return "".join(chr(n) for n in seq)
--- a/parakeet/g2p/text/init.py
+++ b/parakeet/g2p/text/init.py
@ -1,8 +1,21 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import re
 from . import cleaners
 from .symbols import symbols

-
 # Mappings from symbol to numeric ID and vice versa:
 _symbol_to_id = {s: i for i, s in enumerate(symbols)}
 _id_to_symbol = {i: s for i, s in enumerate(symbols)}
@ -32,7 +45,8 @@ def text_to_sequence(text, cleaner_names):
        if not m:
            sequence += _symbols_to_sequence(_clean_text(text, cleaner_names))
            break
-        sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names))
+        sequence += _symbols_to_sequence(
+            _clean_text(m.group(1), cleaner_names))
        sequence += _arpabet_to_sequence(m.group(2))
        text = m.group(3)

--- a/parakeet/g2p/text/cleaners.py
+++ b/parakeet/g2p/text/cleaners.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 '''
 Cleaners are transformations that run over the input text at both training and eval time.

@ -14,31 +27,31 @@ import re
 from unidecode import unidecode
 from .numbers import normalize_numbers

-
 # Regular expression matching whitespace:
 _whitespace_re = re.compile(r'\s+')

 # List of (regular expression, replacement) pairs for abbreviations:
-_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
-    ('mrs', 'misess'),
-    ('mr', 'mister'),
-    ('dr', 'doctor'),
-    ('st', 'saint'),
-    ('co', 'company'),
-    ('jr', 'junior'),
-    ('maj', 'major'),
-    ('gen', 'general'),
-    ('drs', 'doctors'),
-    ('rev', 'reverend'),
-    ('lt', 'lieutenant'),
-    ('hon', 'honorable'),
-    ('sgt', 'sergeant'),
-    ('capt', 'captain'),
-    ('esq', 'esquire'),
-    ('ltd', 'limited'),
-    ('col', 'colonel'),
-    ('ft', 'fort'),
-]]
+_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1])
+                  for x in [
+                      ('mrs', 'misess'),
+                      ('mr', 'mister'),
+                      ('dr', 'doctor'),
+                      ('st', 'saint'),
+                      ('co', 'company'),
+                      ('jr', 'junior'),
+                      ('maj', 'major'),
+                      ('gen', 'general'),
+                      ('drs', 'doctors'),
+                      ('rev', 'reverend'),
+                      ('lt', 'lieutenant'),
+                      ('hon', 'honorable'),
+                      ('sgt', 'sergeant'),
+                      ('capt', 'captain'),
+                      ('esq', 'esquire'),
+                      ('ltd', 'limited'),
+                      ('col', 'colonel'),
+                      ('ft', 'fort'),
+                  ]]


 def expand_abbreviations(text):
--- a/parakeet/g2p/text/cmudict.py
+++ b/parakeet/g2p/text/cmudict.py
@ -1,14 +1,28 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import re

-
 valid_symbols = [
-    'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', 'AH2',
-    'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0', 'AY1', 'AY2',
-    'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0', 'ER1', 'ER2', 'EY',
-    'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1', 'IH2', 'IY', 'IY0', 'IY1',
-    'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0',
-    'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW',
-    'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH'
+    'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1',
+    'AH2', 'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0',
+    'AY1', 'AY2', 'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0',
+    'ER1', 'ER2', 'EY', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0',
+    'IH1', 'IH2', 'IY', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG',
+    'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH',
+    'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W',
+    'Y', 'Z', 'ZH'
 ]

 _valid_symbol_set = set(valid_symbols)
@ -24,7 +38,10 @@ class CMUDict:
        else:
            entries = _parse_cmudict(file_or_path)
        if not keep_ambiguous:
-            entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
+            entries = {
+                word: pron
+                for word, pron in entries.items() if len(pron) == 1
+            }
        self._entries = entries

    def __len__(self):
--- a/parakeet/g2p/text/numbers.py
+++ b/parakeet/g2p/text/numbers.py
@ -3,7 +3,6 @@
 import inflect
 import re

-
 _inflect = inflect.engine()
 _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
 _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
@ -56,7 +55,8 @@ def _expand_number(m):
        elif num % 100 == 0:
            return _inflect.number_to_words(num // 100) + ' hundred'
        else:
-            return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
+            return _inflect.number_to_words(
+                num, andword='', zero='oh', group=2).replace(', ', ' ')
    else:
        return _inflect.number_to_words(num, andword='')

--- a/parakeet/g2p/text/symbols.py
+++ b/parakeet/g2p/text/symbols.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 '''
 Defines the set of symbols used in text input to the model.

--- a/parakeet/models/init.py
+++ b/parakeet/models/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/parakeet/models/deepvoice3/init.py
+++ b/parakeet/models/deepvoice3/init.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from parakeet.models.deepvoice3.encoder import Encoder, ConvSpec
 from parakeet.models.deepvoice3.decoder import Decoder, WindowRange
 from parakeet.models.deepvoice3.converter import Converter
--- a/parakeet/models/deepvoice3/attention.py
+++ b/parakeet/models/deepvoice3/attention.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 from collections import namedtuple
 from paddle import fluid
@ -19,23 +33,19 @@ class Attention(dg.Layer):
                 value_projection=True):
        super(Attention, self).__init__()
        std = np.sqrt(1 / query_dim)
-        self.query_proj = Linear(query_dim,
-                                 embed_dim,
-                                 param_attr=I.Normal(scale=std))
+        self.query_proj = Linear(
+            query_dim, embed_dim, param_attr=I.Normal(scale=std))
        if key_projection:
            std = np.sqrt(1 / embed_dim)
-            self.key_proj = Linear(embed_dim,
-                                   embed_dim,
-                                   param_attr=I.Normal(scale=std))
+            self.key_proj = Linear(
+                embed_dim, embed_dim, param_attr=I.Normal(scale=std))
        if value_projection:
            std = np.sqrt(1 / embed_dim)
-            self.value_proj = Linear(embed_dim,
-                                     embed_dim,
-                                     param_attr=I.Normal(scale=std))
+            self.value_proj = Linear(
+                embed_dim, embed_dim, param_attr=I.Normal(scale=std))
        std = np.sqrt(1 / embed_dim)
-        self.out_proj = Linear(embed_dim,
-                               query_dim,
-                               param_attr=I.Normal(scale=std))
+        self.out_proj = Linear(
+            embed_dim, query_dim, param_attr=I.Normal(scale=std))

        self.key_projection = key_projection
        self.value_projection = value_projection
@ -102,9 +112,8 @@ class Attention(dg.Layer):

        x = F.softmax(x)
        attn_scores = x
-        x = F.dropout(x,
-                      self.dropout,
-                      dropout_implementation="upscale_in_train")
+        x = F.dropout(
+            x, self.dropout, dropout_implementation="upscale_in_train")
        x = F.matmul(x, values)
        encoder_length = keys.shape[1]
        # CAUTION: is it wrong? let it be now
--- a/parakeet/models/deepvoice3/conv1dglu.py
+++ b/parakeet/models/deepvoice3/conv1dglu.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np

 from paddle import fluid
@ -15,6 +29,7 @@ class Conv1DGLU(dg.Layer):
    has residual connection from the input x, and scale the output by 
    np.sqrt(0.5).
    """
+
    def __init__(self,
                 n_speakers,
                 speaker_dim,
@ -50,20 +65,20 @@ class Conv1DGLU(dg.Layer):
            ), "this block uses residual connection"\
                "the input_channes should equals num_filters"
        std = np.sqrt(std_mul * (1 - dropout) / (filter_size * in_channels))
-        self.conv = Conv1DCell(in_channels,
-                               2 * num_filters,
-                               filter_size,
-                               dilation,
-                               causal,
-                               param_attr=I.Normal(scale=std))
+        self.conv = Conv1DCell(
+            in_channels,
+            2 * num_filters,
+            filter_size,
+            dilation,
+            causal,
+            param_attr=I.Normal(scale=std))

        if n_speakers > 1:
            assert (speaker_dim is not None
                    ), "speaker embed should not be null in multi-speaker case"
            std = np.sqrt(1 / speaker_dim)
-            self.fc = Linear(speaker_dim,
-                             num_filters,
-                             param_attr=I.Normal(scale=std))
+            self.fc = Linear(
+                speaker_dim, num_filters, param_attr=I.Normal(scale=std))

    def forward(self, x, speaker_embed=None):
        """
@ -82,9 +97,8 @@ class Conv1DGLU(dg.Layer):
                C_out means the output channels of Conv1DGLU.
        """
        residual = x
-        x = F.dropout(x,
-                      self.dropout,
-                      dropout_implementation="upscale_in_train")
+        x = F.dropout(
+            x, self.dropout, dropout_implementation="upscale_in_train")
        x = self.conv(x)
        content, gate = F.split(x, num_or_sections=2, dim=1)

@ -118,9 +132,8 @@ class Conv1DGLU(dg.Layer):
                C_out means the output channels of Conv1DGLU.
        """
        residual = x_t
-        x_t = F.dropout(x_t,
-                        self.dropout,
-                        dropout_implementation="upscale_in_train")
+        x_t = F.dropout(
+            x_t, self.dropout, dropout_implementation="upscale_in_train")
        x_t = self.conv.add_input(x_t)
        content_t, gate_t = F.split(x_t, num_or_sections=2, dim=1)

--- a/parakeet/models/deepvoice3/converter.py
+++ b/parakeet/models/deepvoice3/converter.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 from itertools import chain

@ -19,44 +33,45 @@ def upsampling_4x_blocks(n_speakers, speaker_dim, target_channels, dropout):
            2,
            stride=2,
            param_attr=I.Normal(scale=np.sqrt(1 / (2 * target_channels)))),
-        Conv1DGLU(n_speakers,
-                  speaker_dim,
-                  target_channels,
-                  target_channels,
-                  3,
-                  dilation=1,
-                  std_mul=1.,
-                  dropout=dropout),
-        Conv1DGLU(n_speakers,
-                  speaker_dim,
-                  target_channels,
-                  target_channels,
-                  3,
-                  dilation=3,
-                  std_mul=4.,
-                  dropout=dropout),
-        Conv1DTranspose(
+        Conv1DGLU(
+            n_speakers,
+            speaker_dim,
            target_channels,
            target_channels,
-            2,
-            stride=2,
-            param_attr=I.Normal(scale=np.sqrt(4. / (2 * target_channels)))),
-        Conv1DGLU(n_speakers,
-                  speaker_dim,
-                  target_channels,
-                  target_channels,
-                  3,
-                  dilation=1,
-                  std_mul=1.,
-                  dropout=dropout),
-        Conv1DGLU(n_speakers,
-                  speaker_dim,
-                  target_channels,
-                  target_channels,
-                  3,
-                  dilation=3,
-                  std_mul=4.,
-                  dropout=dropout)
+            3,
+            dilation=1,
+            std_mul=1.,
+            dropout=dropout), Conv1DGLU(
+                n_speakers,
+                speaker_dim,
+                target_channels,
+                target_channels,
+                3,
+                dilation=3,
+                std_mul=4.,
+                dropout=dropout), Conv1DTranspose(
+                    target_channels,
+                    target_channels,
+                    2,
+                    stride=2,
+                    param_attr=I.Normal(scale=np.sqrt(
+                        4. / (2 * target_channels)))), Conv1DGLU(
+                            n_speakers,
+                            speaker_dim,
+                            target_channels,
+                            target_channels,
+                            3,
+                            dilation=1,
+                            std_mul=1.,
+                            dropout=dropout), Conv1DGLU(
+                                n_speakers,
+                                speaker_dim,
+                                target_channels,
+                                target_channels,
+                                3,
+                                dilation=3,
+                                std_mul=4.,
+                                dropout=dropout)
    ]
    return upsampling_convolutions

@ -69,36 +84,38 @@ def upsampling_2x_blocks(n_speakers, speaker_dim, target_channels, dropout):
            2,
            stride=2,
            param_attr=I.Normal(scale=np.sqrt(1. / (2 * target_channels)))),
-        Conv1DGLU(n_speakers,
-                  speaker_dim,
-                  target_channels,
-                  target_channels,
-                  3,
-                  dilation=1,
-                  std_mul=1.,
-                  dropout=dropout),
-        Conv1DGLU(n_speakers,
-                  speaker_dim,
-                  target_channels,
-                  target_channels,
-                  3,
-                  dilation=3,
-                  std_mul=4.,
-                  dropout=dropout)
+        Conv1DGLU(
+            n_speakers,
+            speaker_dim,
+            target_channels,
+            target_channels,
+            3,
+            dilation=1,
+            std_mul=1.,
+            dropout=dropout), Conv1DGLU(
+                n_speakers,
+                speaker_dim,
+                target_channels,
+                target_channels,
+                3,
+                dilation=3,
+                std_mul=4.,
+                dropout=dropout)
    ]
    return upsampling_convolutions


 def upsampling_1x_blocks(n_speakers, speaker_dim, target_channels, dropout):
    upsampling_convolutions = [
-        Conv1DGLU(n_speakers,
-                  speaker_dim,
-                  target_channels,
-                  target_channels,
-                  3,
-                  dilation=3,
-                  std_mul=4.,
-                  dropout=dropout)
+        Conv1DGLU(
+            n_speakers,
+            speaker_dim,
+            target_channels,
+            target_channels,
+            3,
+            dilation=3,
+            std_mul=4.,
+            dropout=dropout)
    ]
    return upsampling_convolutions

@ -108,6 +125,7 @@ class Converter(dg.Layer):
    Vocoder that transforms mel spectrogram (or ecoder hidden states) 
    to waveform.
    """
+
    def __init__(self,
                 n_speakers,
                 speaker_dim,
@ -161,33 +179,36 @@ class Converter(dg.Layer):
                std = np.sqrt(std_mul / in_channels)
                # CAUTION: relu
                self.convolutions.append(
-                    Conv1D(in_channels,
-                           out_channels,
-                           1,
-                           act="relu",
-                           param_attr=I.Normal(scale=std)))
+                    Conv1D(
+                        in_channels,
+                        out_channels,
+                        1,
+                        act="relu",
+                        param_attr=I.Normal(scale=std)))
                in_channels = out_channels
                std_mul = 2.0
            self.convolutions.append(
-                Conv1DGLU(n_speakers,
-                          speaker_dim,
-                          in_channels,
-                          out_channels,
-                          filter_size,
-                          dilation=dilation,
-                          std_mul=std_mul,
-                          dropout=dropout))
+                Conv1DGLU(
+                    n_speakers,
+                    speaker_dim,
+                    in_channels,
+                    out_channels,
+                    filter_size,
+                    dilation=dilation,
+                    std_mul=std_mul,
+                    dropout=dropout))
            in_channels = out_channels
            std_mul = 4.0

        # final conv proj, channel transformed to linear dim
        std = np.sqrt(std_mul * (1 - dropout) / in_channels)
        # CAUTION: sigmoid
-        self.last_conv_proj = Conv1D(in_channels,
-                                     linear_dim,
-                                     1,
-                                     act="sigmoid",
-                                     param_attr=I.Normal(scale=std))
+        self.last_conv_proj = Conv1D(
+            in_channels,
+            linear_dim,
+            1,
+            act="sigmoid",
+            param_attr=I.Normal(scale=std))

    def forward(self, x, speaker_embed=None):
        """
--- a/parakeet/models/deepvoice3/decoder.py
+++ b/parakeet/models/deepvoice3/decoder.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import paddle.fluid.layers as F
 import paddle.fluid.initializer as I
@ -80,25 +94,25 @@ def unfold_adjacent_frames(folded_frames, r):

 class Decoder(dg.Layer):
    def __init__(
-        self,
-        n_speakers,
-        speaker_dim,
-        embed_dim,
-        mel_dim,
-        r=1,
-        max_positions=512,
-        padding_idx=None,  # remove it!
-        preattention=(ConvSpec(128, 5, 1), ) * 4,
-        convolutions=(ConvSpec(128, 5, 1), ) * 4,
-        attention=True,
-        dropout=0.0,
-        use_memory_mask=False,
-        force_monotonic_attention=False,
-        query_position_rate=1.0,
-        key_position_rate=1.0,
-        window_range=WindowRange(-1, 3),
-        key_projection=True,
-        value_projection=True):
+            self,
+            n_speakers,
+            speaker_dim,
+            embed_dim,
+            mel_dim,
+            r=1,
+            max_positions=512,
+            padding_idx=None,  # remove it!
+            preattention=(ConvSpec(128, 5, 1), ) * 4,
+            convolutions=(ConvSpec(128, 5, 1), ) * 4,
+            attention=True,
+            dropout=0.0,
+            use_memory_mask=False,
+            force_monotonic_attention=False,
+            query_position_rate=1.0,
+            key_position_rate=1.0,
+            window_range=WindowRange(-1, 3),
+            key_projection=True,
+            value_projection=True):
        super(Decoder, self).__init__()

        self.dropout = dropout
@ -111,23 +125,17 @@ class Decoder(dg.Layer):

        conv_channels = convolutions[0].out_channels
        # only when padding idx is 0 can we easilt handle it
-        self.embed_keys_positions = PositionEmbedding(max_positions,
-                                                      embed_dim,
-                                                      padding_idx=0)
-        self.embed_query_positions = PositionEmbedding(max_positions,
-                                                       conv_channels,
-                                                       padding_idx=0)
+        self.embed_keys_positions = PositionEmbedding(
+            max_positions, embed_dim, padding_idx=0)
+        self.embed_query_positions = PositionEmbedding(
+            max_positions, conv_channels, padding_idx=0)

        if n_speakers > 1:
            std = np.sqrt((1 - dropout) / speaker_dim)
-            self.speaker_proj1 = Linear(speaker_dim,
-                                        1,
-                                        act="sigmoid",
-                                        param_attr=I.Normal(scale=std))
-            self.speaker_proj2 = Linear(speaker_dim,
-                                        1,
-                                        act="sigmoid",
-                                        param_attr=I.Normal(scale=std))
+            self.speaker_proj1 = Linear(
+                speaker_dim, 1, act="sigmoid", param_attr=I.Normal(scale=std))
+            self.speaker_proj2 = Linear(
+                speaker_dim, 1, act="sigmoid", param_attr=I.Normal(scale=std))

        # prenet
        self.prenet = dg.LayerList()
@ -138,24 +146,26 @@ class Decoder(dg.Layer):
                # conv1d & relu
                std = np.sqrt(std_mul / in_channels)
                self.prenet.append(
-                    Conv1D(in_channels,
-                           out_channels,
-                           1,
-                           act="relu",
-                           param_attr=I.Normal(scale=std)))
+                    Conv1D(
+                        in_channels,
+                        out_channels,
+                        1,
+                        act="relu",
+                        param_attr=I.Normal(scale=std)))
                in_channels = out_channels
                std_mul = 2.0
            self.prenet.append(
-                Conv1DGLU(n_speakers,
-                          speaker_dim,
-                          in_channels,
-                          out_channels,
-                          filter_size,
-                          dilation,
-                          std_mul,
-                          dropout,
-                          causal=True,
-                          residual=True))
+                Conv1DGLU(
+                    n_speakers,
+                    speaker_dim,
+                    in_channels,
+                    out_channels,
+                    filter_size,
+                    dilation,
+                    std_mul,
+                    dropout,
+                    causal=True,
+                    residual=True))
            in_channels = out_channels
            std_mul = 4.0

@ -184,16 +194,17 @@ class Decoder(dg.Layer):
            assert (
                in_channels == out_channels
            ), "the stack of convolution & attention does not change channels"
-            conv_layer = Conv1DGLU(n_speakers,
-                                   speaker_dim,
-                                   in_channels,
-                                   out_channels,
-                                   filter_size,
-                                   dilation,
-                                   std_mul,
-                                   dropout,
-                                   causal=True,
-                                   residual=False)
+            conv_layer = Conv1DGLU(
+                n_speakers,
+                speaker_dim,
+                in_channels,
+                out_channels,
+                filter_size,
+                dilation,
+                std_mul,
+                dropout,
+                causal=True,
+                residual=False)
            attn_layer = Attention(
                out_channels,
                embed_dim,
@ -211,10 +222,8 @@ class Decoder(dg.Layer):

        # 1 * 1 conv to transform channels
        std = np.sqrt(std_mul * (1 - dropout) / in_channels)
-        self.last_conv = Conv1D(in_channels,
-                                mel_dim * r,
-                                1,
-                                param_attr=I.Normal(scale=std))
+        self.last_conv = Conv1D(
+            in_channels, mel_dim * r, 1, param_attr=I.Normal(scale=std))

        # mel (before sigmoid) to done hat
        std = np.sqrt(1 / in_channels)
@ -308,9 +317,8 @@ class Decoder(dg.Layer):
        # (B, C, T)
        frames = F.transpose(frames, [0, 2, 1])
        x = frames
-        x = F.dropout(x,
-                      self.dropout,
-                      dropout_implementation="upscale_in_train")
+        x = F.dropout(
+            x, self.dropout, dropout_implementation="upscale_in_train")
        # Prenet
        for layer in self.prenet:
            if isinstance(layer, Conv1DGLU):
@ -408,14 +416,13 @@ class Decoder(dg.Layer):
            test_inputs = fold_adjacent_frames(test_inputs, self.r)
            test_inputs = F.transpose(test_inputs, [0, 2, 1])

-        initial_input = F.zeros((batch_size, self.mel_dim * self.r, 1),
-                                dtype=keys.dtype)
+        initial_input = F.zeros(
+            (batch_size, self.mel_dim * self.r, 1), dtype=keys.dtype)

        t = 0  # decoder time step
        while True:
-            frame_pos = F.fill_constant((batch_size, 1),
-                                        value=t + 1,
-                                        dtype="int64")
+            frame_pos = F.fill_constant(
+                (batch_size, 1), value=t + 1, dtype="int64")
            w = self.query_position_rate
            if self.n_speakers > 1:
                w = w * F.squeeze(self.speaker_proj2(speaker_embed), [-1])
@ -433,9 +440,8 @@ class Decoder(dg.Layer):
                    current_input = initial_input

            x_t = current_input
-            x_t = F.dropout(x_t,
-                            self.dropout,
-                            dropout_implementation="upscale_in_train")
+            x_t = F.dropout(
+                x_t, self.dropout, dropout_implementation="upscale_in_train")

            # Prenet
            for layer in self.prenet:
@ -453,15 +459,15 @@ class Decoder(dg.Layer):
                    x_t = F.transpose(x_t, [0, 2, 1])
                    if frame_pos_embed is not None:
                        x_t += frame_pos_embed
-                    x_t, attn_scores = attn(
-                        x_t, (keys, values), mask,
-                        last_attended[i] if test_inputs is None else None)
+                    x_t, attn_scores = attn(x_t, (keys, values), mask,
+                                            last_attended[i]
+                                            if test_inputs is None else None)
                    x_t = F.transpose(x_t, [0, 2, 1])
                    step_attn_scores.append(attn_scores)  #(B, T_dec=1, T_enc)
                    # update last attended when necessary
                    if self.force_monotonic_attention[i]:
-                        last_attended[i] = np.argmax(attn_scores.numpy(),
-                                                     axis=-1)[0][0]
+                        last_attended[i] = np.argmax(
+                            attn_scores.numpy(), axis=-1)[0][0]
                x_t = F.scale(residual + x_t, np.sqrt(0.5))
            if len(step_attn_scores):
                # (B, 1, T_enc) again
@ -485,8 +491,8 @@ class Decoder(dg.Layer):
            t += 1

            if test_inputs is None:
-                if F.reduce_min(done_t).numpy(
-                )[0] > 0.5 and t > self.min_decoder_steps:
+                if F.reduce_min(done_t).numpy()[
+                        0] > 0.5 and t > self.min_decoder_steps:
                    break
                elif t > self.max_decoder_steps:
                    break
--- a/parakeet/models/deepvoice3/encoder.py
+++ b/parakeet/models/deepvoice3/encoder.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 from collections import namedtuple

@ -33,14 +47,16 @@ class Encoder(dg.Layer):
        self.dropout = dropout
        if n_speakers > 1:
            std = np.sqrt((1 - dropout) / speaker_dim)
-            self.sp_proj1 = Linear(speaker_dim,
-                                   embed_dim,
-                                   act="softsign",
-                                   param_attr=I.Normal(scale=std))
-            self.sp_proj2 = Linear(speaker_dim,
-                                   embed_dim,
-                                   act="softsign",
-                                   param_attr=I.Normal(scale=std))
+            self.sp_proj1 = Linear(
+                speaker_dim,
+                embed_dim,
+                act="softsign",
+                param_attr=I.Normal(scale=std))
+            self.sp_proj2 = Linear(
+                speaker_dim,
+                embed_dim,
+                act="softsign",
+                param_attr=I.Normal(scale=std))
        self.n_speakers = n_speakers

        self.convolutions = dg.LayerList()
@ -51,31 +67,34 @@ class Encoder(dg.Layer):
            if in_channels != out_channels:
                std = np.sqrt(std_mul / in_channels)
                self.convolutions.append(
-                    Conv1D(in_channels,
-                           out_channels,
-                           1,
-                           act="relu",
-                           param_attr=I.Normal(scale=std)))
+                    Conv1D(
+                        in_channels,
+                        out_channels,
+                        1,
+                        act="relu",
+                        param_attr=I.Normal(scale=std)))
                in_channels = out_channels
                std_mul = 2.0

            self.convolutions.append(
-                Conv1DGLU(n_speakers,
-                          speaker_dim,
-                          in_channels,
-                          out_channels,
-                          filter_size,
-                          dilation,
-                          std_mul,
-                          dropout,
-                          causal=False,
-                          residual=True))
+                Conv1DGLU(
+                    n_speakers,
+                    speaker_dim,
+                    in_channels,
+                    out_channels,
+                    filter_size,
+                    dilation,
+                    std_mul,
+                    dropout,
+                    causal=False,
+                    residual=True))
            in_channels = out_channels
            std_mul = 4.0

        std = np.sqrt(std_mul * (1 - dropout) / in_channels)
        self.convolutions.append(
-            Conv1D(in_channels, embed_dim, 1, param_attr=I.Normal(scale=std)))
+            Conv1D(
+                in_channels, embed_dim, 1, param_attr=I.Normal(scale=std)))

    def forward(self, x, speaker_embed=None):
        """
@ -96,9 +115,8 @@ class Encoder(dg.Layer):
                representation for values.
        """
        x = self.embed(x)
-        x = F.dropout(x,
-                      self.dropout,
-                      dropout_implementation="upscale_in_train")
+        x = F.dropout(
+            x, self.dropout, dropout_implementation="upscale_in_train")
        x = F.transpose(x, [0, 2, 1])

        if self.n_speakers > 1 and speaker_embed is not None:
--- a/parakeet/models/deepvoice3/loss.py
+++ b/parakeet/models/deepvoice3/loss.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 from numba import jit

@ -31,9 +45,7 @@ def guided_attention(N, max_N, T, max_T, g):
    return W


-def guided_attentions(encoder_lengths,
-                      decoder_lengths,
-                      max_decoder_len,
+def guided_attentions(encoder_lengths, decoder_lengths, max_decoder_len,
                      g=0.2):
    B = len(encoder_lengths)
    max_input_len = encoder_lengths.max()
@ -93,9 +105,8 @@ class TTSLoss(object):
    def binary_divergence(self, prediction, target, mask):
        flattened_prediction = F.reshape(prediction, [-1, 1])
        flattened_target = F.reshape(target, [-1, 1])
-        flattened_loss = F.log_loss(flattened_prediction,
-                                    flattened_target,
-                                    epsilon=1e-8)
+        flattened_loss = F.log_loss(
+            flattened_prediction, flattened_target, epsilon=1e-8)
        bin_div = fluid.layers.reshape(flattened_loss, prediction.shape)

        w = self.masked_weight
@ -163,23 +174,20 @@ class TTSLoss(object):
        max_mel_steps = max_frames // self.downsample_factor
        max_decoder_steps = max_mel_steps // self.r

-        decoder_mask = F.sequence_mask(n_frames // self.downsample_factor //
-                                       self.r,
-                                       max_decoder_steps,
-                                       dtype="float32")
-        mel_mask = F.sequence_mask(n_frames // self.downsample_factor,
-                                   max_mel_steps,
-                                   dtype="float32")
+        decoder_mask = F.sequence_mask(
+            n_frames // self.downsample_factor // self.r,
+            max_decoder_steps,
+            dtype="float32")
+        mel_mask = F.sequence_mask(
+            n_frames // self.downsample_factor, max_mel_steps, dtype="float32")
        lin_mask = F.sequence_mask(n_frames, max_frames, dtype="float32")

        if compute_lin_loss:
            lin_hyp = lin_hyp[:, :-self.time_shift, :]
            lin_ref = lin_ref[:, self.time_shift:, :]
            lin_mask = lin_mask[:, self.time_shift:, :]
-            lin_l1_loss = self.l1_loss(lin_hyp,
-                                       lin_ref,
-                                       lin_mask,
-                                       priority_bin=self.priority_bin)
+            lin_l1_loss = self.l1_loss(
+                lin_hyp, lin_ref, lin_mask, priority_bin=self.priority_bin)
            lin_bce_loss = self.binary_divergence(lin_hyp, lin_ref, lin_mask)
            lin_loss = self.binary_divergence_weight * lin_bce_loss \
                     + (1 - self.binary_divergence_weight) * lin_l1_loss
@ -197,9 +205,10 @@ class TTSLoss(object):
            total_loss += mel_loss

        if compute_attn_loss:
-            attn_loss = self.attention_loss(
-                attn_hyp, input_lengths.numpy(),
-                n_frames.numpy() // (self.downsample_factor * self.r))
+            attn_loss = self.attention_loss(attn_hyp,
+                                            input_lengths.numpy(),
+                                            n_frames.numpy() //
+                                            (self.downsample_factor * self.r))
            total_loss += attn_loss

        if compute_done_loss:
--- a/parakeet/models/deepvoice3/model.py
+++ b/parakeet/models/deepvoice3/model.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np

 import paddle.fluid.layers as F
@ -29,9 +43,9 @@ class DeepVoice3(dg.Layer):
        mel_outputs, alignments, done, decoder_states = self.decoder(
            (keys, values), valid_lengths, mel_inputs, text_positions,
            frame_positions, speaker_embed)
-        linear_outputs = self.converter(
-            decoder_states if self.use_decoder_states else mel_outputs,
-            speaker_embed)
+        linear_outputs = self.converter(decoder_states
+                                        if self.use_decoder_states else
+                                        mel_outputs, speaker_embed)
        return mel_outputs, linear_outputs, alignments, done

    def transduce(self, text_sequences, text_positions, speaker_indices=None):
@ -43,7 +57,7 @@ class DeepVoice3(dg.Layer):
        keys, values = self.encoder(text_sequences, speaker_embed)
        mel_outputs, alignments, done, decoder_states = self.decoder.decode(
            (keys, values), text_positions, speaker_embed)
-        linear_outputs = self.converter(
-            decoder_states if self.use_decoder_states else mel_outputs,
-            speaker_embed)
+        linear_outputs = self.converter(decoder_states
+                                        if self.use_decoder_states else
+                                        mel_outputs, speaker_embed)
        return mel_outputs, linear_outputs, alignments, done
--- a/parakeet/models/deepvoice3/position_embedding.py
+++ b/parakeet/models/deepvoice3/position_embedding.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 from paddle import fluid
 import paddle.fluid.layers as F
@ -95,8 +109,9 @@ class PositionEmbedding(dg.Layer):
                                            speaker_position_rate)  # (B, V, C)
        # make indices for gather_nd
        batch_id = F.expand(
-            F.unsqueeze(F.range(0, batch_size, 1, dtype="int64"), [1]),
-            [1, time_steps])
+            F.unsqueeze(
+                F.range(
+                    0, batch_size, 1, dtype="int64"), [1]), [1, time_steps])
        # (B, T, 2)
        gather_nd_id = F.stack([batch_id, indices], -1)

--- a/parakeet/models/fastspeech/init.py
+++ b/parakeet/models/fastspeech/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/parakeet/models/fastspeech/decoder.py
+++ b/parakeet/models/fastspeech/decoder.py
@ -1,8 +1,22 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle.fluid.dygraph as dg
 import paddle.fluid as fluid
 from parakeet.models.transformer_tts.utils import *
 from parakeet.models.fastspeech.fft_block import FFTBlock

+
 class Decoder(dg.Layer):
    def __init__(self,
                 len_max_seq,
@ -18,13 +32,26 @@ class Decoder(dg.Layer):
        super(Decoder, self).__init__()

        n_position = len_max_seq + 1
-        self.pos_inp = get_sinusoid_encoding_table(n_position, d_model, padding_idx=0)
-        self.position_enc = dg.Embedding(size=[n_position, d_model],
-                                 padding_idx=0,
-                                 param_attr=fluid.ParamAttr(
-                                     initializer=fluid.initializer.NumpyArrayInitializer(self.pos_inp),
-                                     trainable=False))
-        self.layer_stack = [FFTBlock(d_model, d_inner, n_head, d_k, d_v, fft_conv1d_kernel, fft_conv1d_padding, dropout=dropout) for _ in range(n_layers)] 
+        self.pos_inp = get_sinusoid_encoding_table(
+            n_position, d_model, padding_idx=0)
+        self.position_enc = dg.Embedding(
+            size=[n_position, d_model],
+            padding_idx=0,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.NumpyArrayInitializer(
+                    self.pos_inp),
+                trainable=False))
+        self.layer_stack = [
+            FFTBlock(
+                d_model,
+                d_inner,
+                n_head,
+                d_k,
+                d_v,
+                fft_conv1d_kernel,
+                fft_conv1d_padding,
+                dropout=dropout) for _ in range(n_layers)
+        ]
        for i, layer in enumerate(self.layer_stack):
            self.add_sublayer('fft_{}'.format(i), layer)

--- a/parakeet/models/fastspeech/encoder.py
+++ b/parakeet/models/fastspeech/encoder.py
@ -1,8 +1,22 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle.fluid.dygraph as dg
 import paddle.fluid as fluid
 from parakeet.models.transformer_tts.utils import *
 from parakeet.models.fastspeech.fft_block import FFTBlock

+
 class Encoder(dg.Layer):
    def __init__(self,
                 n_src_vocab,
@ -19,14 +33,28 @@ class Encoder(dg.Layer):
        super(Encoder, self).__init__()
        n_position = len_max_seq + 1

-        self.src_word_emb = dg.Embedding(size=[n_src_vocab, d_model], padding_idx=0)
-        self.pos_inp = get_sinusoid_encoding_table(n_position, d_model, padding_idx=0)
-        self.position_enc = dg.Embedding(size=[n_position, d_model],
-                                 padding_idx=0,
-                                 param_attr=fluid.ParamAttr(
-                                     initializer=fluid.initializer.NumpyArrayInitializer(self.pos_inp),
-                                     trainable=False))
-        self.layer_stack = [FFTBlock(d_model, d_inner, n_head, d_k, d_v, fft_conv1d_kernel, fft_conv1d_padding, dropout=dropout) for _ in range(n_layers)]
+        self.src_word_emb = dg.Embedding(
+            size=[n_src_vocab, d_model], padding_idx=0)
+        self.pos_inp = get_sinusoid_encoding_table(
+            n_position, d_model, padding_idx=0)
+        self.position_enc = dg.Embedding(
+            size=[n_position, d_model],
+            padding_idx=0,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.NumpyArrayInitializer(
+                    self.pos_inp),
+                trainable=False))
+        self.layer_stack = [
+            FFTBlock(
+                d_model,
+                d_inner,
+                n_head,
+                d_k,
+                d_v,
+                fft_conv1d_kernel,
+                fft_conv1d_padding,
+                dropout=dropout) for _ in range(n_layers)
+        ]
        for i, layer in enumerate(self.layer_stack):
            self.add_sublayer('fft_{}'.format(i), layer)

@ -52,7 +80,8 @@ class Encoder(dg.Layer):
        non_pad_mask = get_non_pad_mask(character)

        # -- Forward
-        enc_output = self.src_word_emb(character) + self.position_enc(text_pos) #(N, T, C)
+        enc_output = self.src_word_emb(character) + self.position_enc(
+            text_pos)  #(N, T, C)

        for enc_layer in self.layer_stack:
            enc_output, enc_slf_attn = enc_layer(
--- a/parakeet/models/fastspeech/fastspeech.py
+++ b/parakeet/models/fastspeech/fastspeech.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import math
 import paddle.fluid.dygraph as dg
 import paddle.fluid as fluid
@ -7,54 +20,67 @@ from parakeet.models.fastspeech.length_regulator import LengthRegulator
 from parakeet.models.fastspeech.encoder import Encoder
 from parakeet.models.fastspeech.decoder import Decoder

+
 class FastSpeech(dg.Layer):
    def __init__(self, cfg):
        " FastSpeech"
        super(FastSpeech, self).__init__()

-        self.encoder = Encoder(n_src_vocab=len(symbols)+1,
-                               len_max_seq=cfg['max_seq_len'],
-                               n_layers=cfg['encoder_n_layer'],
-                               n_head=cfg['encoder_head'],
-                               d_k=cfg['fs_hidden_size'] // cfg['encoder_head'],
-                               d_v=cfg['fs_hidden_size'] // cfg['encoder_head'],
-                               d_model=cfg['fs_hidden_size'],
-                               d_inner=cfg['encoder_conv1d_filter_size'],
-                               fft_conv1d_kernel=cfg['fft_conv1d_filter'], 
-                               fft_conv1d_padding=cfg['fft_conv1d_padding'],
-                               dropout=0.1)
-        self.length_regulator = LengthRegulator(input_size=cfg['fs_hidden_size'], 
-                                                out_channels=cfg['duration_predictor_output_size'], 
-                                                filter_size=cfg['duration_predictor_filter_size'], 
-                                                dropout=cfg['dropout'])
-        self.decoder = Decoder(len_max_seq=cfg['max_seq_len'],
-                                n_layers=cfg['decoder_n_layer'],
-                                n_head=cfg['decoder_head'],
-                                d_k=cfg['fs_hidden_size'] // cfg['decoder_head'],
-                                d_v=cfg['fs_hidden_size'] // cfg['decoder_head'],
-                                d_model=cfg['fs_hidden_size'],
-                                d_inner=cfg['decoder_conv1d_filter_size'],
-                                fft_conv1d_kernel=cfg['fft_conv1d_filter'], 
-                                fft_conv1d_padding=cfg['fft_conv1d_padding'],
-                                dropout=0.1)
-        self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
+        self.encoder = Encoder(
+            n_src_vocab=len(symbols) + 1,
+            len_max_seq=cfg['max_seq_len'],
+            n_layers=cfg['encoder_n_layer'],
+            n_head=cfg['encoder_head'],
+            d_k=cfg['fs_hidden_size'] // cfg['encoder_head'],
+            d_v=cfg['fs_hidden_size'] // cfg['encoder_head'],
+            d_model=cfg['fs_hidden_size'],
+            d_inner=cfg['encoder_conv1d_filter_size'],
+            fft_conv1d_kernel=cfg['fft_conv1d_filter'],
+            fft_conv1d_padding=cfg['fft_conv1d_padding'],
+            dropout=0.1)
+        self.length_regulator = LengthRegulator(
+            input_size=cfg['fs_hidden_size'],
+            out_channels=cfg['duration_predictor_output_size'],
+            filter_size=cfg['duration_predictor_filter_size'],
+            dropout=cfg['dropout'])
+        self.decoder = Decoder(
+            len_max_seq=cfg['max_seq_len'],
+            n_layers=cfg['decoder_n_layer'],
+            n_head=cfg['decoder_head'],
+            d_k=cfg['fs_hidden_size'] // cfg['decoder_head'],
+            d_v=cfg['fs_hidden_size'] // cfg['decoder_head'],
+            d_model=cfg['fs_hidden_size'],
+            d_inner=cfg['decoder_conv1d_filter_size'],
+            fft_conv1d_kernel=cfg['fft_conv1d_filter'],
+            fft_conv1d_padding=cfg['fft_conv1d_padding'],
+            dropout=0.1)
+        self.weight = fluid.ParamAttr(
+            initializer=fluid.initializer.XavierInitializer())
        k = math.sqrt(1 / cfg['fs_hidden_size'])
-        self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
-        self.mel_linear = dg.Linear(cfg['fs_hidden_size'], 
-                                    cfg['audio']['num_mels']* cfg['audio']['outputs_per_step'],
-                                    param_attr = self.weight,
-                                    bias_attr = self.bias,)
-        self.postnet = PostConvNet(n_mels=cfg['audio']['num_mels'],
-                 num_hidden=512,
-                 filter_size=5,
-                 padding=int(5 / 2),
-                 num_conv=5,
-                 outputs_per_step=cfg['audio']['outputs_per_step'],
-                 use_cudnn=True,
-                 dropout=0.1,
-                 batchnorm_last=True)
+        self.bias = fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+            low=-k, high=k))
+        self.mel_linear = dg.Linear(
+            cfg['fs_hidden_size'],
+            cfg['audio']['num_mels'] * cfg['audio']['outputs_per_step'],
+            param_attr=self.weight,
+            bias_attr=self.bias, )
+        self.postnet = PostConvNet(
+            n_mels=cfg['audio']['num_mels'],
+            num_hidden=512,
+            filter_size=5,
+            padding=int(5 / 2),
+            num_conv=5,
+            outputs_per_step=cfg['audio']['outputs_per_step'],
+            use_cudnn=True,
+            dropout=0.1,
+            batchnorm_last=True)

-    def forward(self, character, text_pos, mel_pos=None, length_target=None, alpha=1.0):
+    def forward(self,
+                character,
+                text_pos,
+                mel_pos=None,
+                length_target=None,
+                alpha=1.0):
        """
        FastSpeech model.
        
@ -80,21 +106,24 @@ class FastSpeech(dg.Layer):
            dec_slf_attn_list (Variable), Shape(B, mel_T, mel_T), the decoder self attention list.
        """

-        encoder_output, non_pad_mask, enc_slf_attn_list = self.encoder(character, text_pos)
+        encoder_output, non_pad_mask, enc_slf_attn_list = self.encoder(
+            character, text_pos)
        if fluid.framework._dygraph_tracer()._train_mode:

-            length_regulator_output, duration_predictor_output = self.length_regulator(encoder_output,
-                                                                                       target=length_target,
-                                                                                       alpha=alpha)
-            decoder_output, dec_slf_attn_list = self.decoder(length_regulator_output, mel_pos)
+            length_regulator_output, duration_predictor_output = self.length_regulator(
+                encoder_output, target=length_target, alpha=alpha)
+            decoder_output, dec_slf_attn_list = self.decoder(
+                length_regulator_output, mel_pos)

            mel_output = self.mel_linear(decoder_output)
            mel_output_postnet = self.postnet(mel_output) + mel_output

            return mel_output, mel_output_postnet, duration_predictor_output, enc_slf_attn_list, dec_slf_attn_list
        else:
-            length_regulator_output, decoder_pos = self.length_regulator(encoder_output, alpha=alpha)
-            decoder_output, _ = self.decoder(length_regulator_output, decoder_pos)
+            length_regulator_output, decoder_pos = self.length_regulator(
+                encoder_output, alpha=alpha)
+            decoder_output, _ = self.decoder(length_regulator_output,
+                                             decoder_pos)
            mel_output = self.mel_linear(decoder_output)
            mel_output_postnet = self.postnet(mel_output) + mel_output

--- a/parakeet/models/fastspeech/fft_block.py
+++ b/parakeet/models/fastspeech/fft_block.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import numpy as np
 import math
 import paddle.fluid.dygraph as dg
@ -6,11 +19,32 @@ import paddle.fluid as fluid
 from parakeet.modules.multihead_attention import MultiheadAttention
 from parakeet.modules.ffn import PositionwiseFeedForward

+
 class FFTBlock(dg.Layer):
-    def __init__(self, d_model, d_inner, n_head, d_k, d_v, filter_size, padding, dropout=0.2):
+    def __init__(self,
+                 d_model,
+                 d_inner,
+                 n_head,
+                 d_k,
+                 d_v,
+                 filter_size,
+                 padding,
+                 dropout=0.2):
        super(FFTBlock, self).__init__()
-        self.slf_attn = MultiheadAttention(d_model, d_k, d_v, num_head=n_head, is_bias=True, dropout=dropout, is_concat=False)
-        self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, filter_size =filter_size, padding =padding, dropout=dropout)
+        self.slf_attn = MultiheadAttention(
+            d_model,
+            d_k,
+            d_v,
+            num_head=n_head,
+            is_bias=True,
+            dropout=dropout,
+            is_concat=False)
+        self.pos_ffn = PositionwiseFeedForward(
+            d_model,
+            d_inner,
+            filter_size=filter_size,
+            padding=padding,
+            dropout=dropout)

    def forward(self, enc_input, non_pad_mask=None, slf_attn_mask=None):
        """
@ -27,7 +61,8 @@ class FFTBlock(dg.Layer):
            output (Variable), Shape(B, T, C), the output after self-attention & ffn.
            slf_attn (Variable), Shape(B * n_head, T, T), the self attention.
        """
-        output, slf_attn = self.slf_attn(enc_input, enc_input, enc_input, mask=slf_attn_mask)
+        output, slf_attn = self.slf_attn(
+            enc_input, enc_input, enc_input, mask=slf_attn_mask)
        output *= non_pad_mask

        output = self.pos_ffn(output)
--- a/parakeet/models/fastspeech/length_regulator.py
+++ b/parakeet/models/fastspeech/length_regulator.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import numpy as np
 import math
 import parakeet.models.fastspeech.utils
@ -6,19 +19,23 @@ import paddle.fluid.layers as layers
 import paddle.fluid as fluid
 from parakeet.modules.customized import Conv1D

+
 class LengthRegulator(dg.Layer):
    def __init__(self, input_size, out_channels, filter_size, dropout=0.1):
        super(LengthRegulator, self).__init__()
-        self.duration_predictor = DurationPredictor(input_size=input_size, 
-                                                    out_channels=out_channels, 
-                                                    filter_size=filter_size, 
-                                                    dropout=dropout)
+        self.duration_predictor = DurationPredictor(
+            input_size=input_size,
+            out_channels=out_channels,
+            filter_size=filter_size,
+            dropout=dropout)

    def LR(self, x, duration_predictor_output, alpha=1.0):
        output = []
        batch_size = x.shape[0]
        for i in range(batch_size):
-            output.append(self.expand(x[i:i+1], duration_predictor_output[i:i+1], alpha))
+            output.append(
+                self.expand(x[i:i + 1], duration_predictor_output[i:i + 1],
+                            alpha))
        output = self.pad(output)
        return output

@ -27,8 +44,8 @@ class LengthRegulator(dg.Layer):
        out_list = []
        for i in range(len(input_ele)):
            pad_len = max_len - input_ele[i].shape[0]
-            one_batch_padded = layers.pad(
-                input_ele[i], [0, pad_len, 0, 0], pad_value=0.0)
+            one_batch_padded = layers.pad(input_ele[i], [0, pad_len, 0, 0],
+                                          pad_value=0.0)
            out_list.append(one_batch_padded)
        out_padded = layers.stack(out_list)
        return out_padded
@ -37,17 +54,16 @@ class LengthRegulator(dg.Layer):
        out = []
        time_steps = batch.shape[1]
        fertilities = predicted.numpy()
-        batch = layers.squeeze(batch,[0]) 
-        
+        batch = layers.squeeze(batch, [0])

        for i in range(time_steps):
-            if fertilities[0,i]==0:
+            if fertilities[0, i] == 0:
                continue
-            out.append(layers.expand(batch[i: i + 1, :], [int(fertilities[0,i]), 1]))
+            out.append(
+                layers.expand(batch[i:i + 1, :], [int(fertilities[0, i]), 1]))
        out = layers.concat(out, axis=0)
        return out

-
    def forward(self, x, alpha=1.0, target=None):
        """
        Length Regulator block in FastSpeech.
@ -70,10 +86,11 @@ class LengthRegulator(dg.Layer):
        else:
            duration_predictor_output = layers.round(duration_predictor_output)
            output = self.LR(x, duration_predictor_output, alpha)
-            mel_pos = dg.to_variable(np.arange(1, output.shape[1]+1))
+            mel_pos = dg.to_variable(np.arange(1, output.shape[1] + 1))
            mel_pos = layers.unsqueeze(mel_pos, [0])
            return output, mel_pos

+
 class DurationPredictor(dg.Layer):
    def __init__(self, input_size, out_channels, filter_size, dropout=0.1):
        super(DurationPredictor, self).__init__()
@ -83,30 +100,38 @@ class DurationPredictor(dg.Layer):
        self.dropout = dropout

        k = math.sqrt(1 / self.input_size)
-        self.conv1 = Conv1D(num_channels = self.input_size, 
-                        num_filters = self.out_channels, 
-                        filter_size = self.filter_size,
-                        padding=1,
-                        param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                        bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)))
-                        #data_format='NTC')
+        self.conv1 = Conv1D(
+            num_channels=self.input_size,
+            num_filters=self.out_channels,
+            filter_size=self.filter_size,
+            padding=1,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))
+        #data_format='NTC')
        k = math.sqrt(1 / self.out_channels)
-        self.conv2 = Conv1D(num_channels = self.out_channels, 
-                        num_filters = self.out_channels, 
-                        filter_size = self.filter_size,
-                        padding=1,
-                        param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                        bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)))
-                        #data_format='NTC')
+        self.conv2 = Conv1D(
+            num_channels=self.out_channels,
+            num_filters=self.out_channels,
+            filter_size=self.filter_size,
+            padding=1,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))
+        #data_format='NTC')
        self.layer_norm1 = dg.LayerNorm(self.out_channels)
        self.layer_norm2 = dg.LayerNorm(self.out_channels)

-        self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
+        self.weight = fluid.ParamAttr(
+            initializer=fluid.initializer.XavierInitializer())
        k = math.sqrt(1 / self.out_channels)
-        self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
+        self.bias = fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+            low=-k, high=k))

-        self.linear =dg.Linear(self.out_channels, 1, param_attr = self.weight,
-                            bias_attr = self.bias)
+        self.linear = dg.Linear(
+            self.out_channels, 1, param_attr=self.weight, bias_attr=self.bias)

    def forward(self, encoder_output):
        """
@ -118,18 +143,15 @@ class DurationPredictor(dg.Layer):
            out (Variable), Shape(B, T, C), the output of duration predictor.
        """
        # encoder_output.shape(N, T, C)
-        out = layers.transpose(encoder_output, [0,2,1])
+        out = layers.transpose(encoder_output, [0, 2, 1])
        out = self.conv1(out)
-        out = layers.transpose(out, [0,2,1])
+        out = layers.transpose(out, [0, 2, 1])
        out = layers.dropout(layers.relu(self.layer_norm1(out)), self.dropout)
-        out = layers.transpose(out, [0,2,1])
+        out = layers.transpose(out, [0, 2, 1])
        out = self.conv2(out)
-        out = layers.transpose(out, [0,2,1])
+        out = layers.transpose(out, [0, 2, 1])
        out = layers.dropout(layers.relu(self.layer_norm2(out)), self.dropout)
        out = layers.relu(self.linear(out))
        out = layers.squeeze(out, axes=[-1])

-            
        return out
-
-        
--- a/parakeet/models/fastspeech/utils.py
+++ b/parakeet/models/fastspeech/utils.py
@ -1,5 +1,19 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import numpy as np

+
 def get_alignment(attn_probs, mel_lens, n_head):
    max_F = 0
    assert attn_probs[0].shape[0] % n_head == 0
@ -8,7 +22,7 @@ def get_alignment(attn_probs, mel_lens, n_head):
    for i in range(len(attn_probs)):
        multi_attn = attn_probs[i].numpy()
        for j in range(n_head):
-            attn = multi_attn[j*batch_size:(j+1)*batch_size]
+            attn = multi_attn[j * batch_size:(j + 1) * batch_size]
            F = score_F(attn)
            if max_F < F:
                max_F = F
@ -16,19 +30,19 @@ def get_alignment(attn_probs, mel_lens, n_head):
    alignment = compute_duration(max_attn, mel_lens)
    return alignment

+
 def score_F(attn):
    max = np.max(attn, axis=-1)
    mean = np.mean(max)
    return mean

+
 def compute_duration(attn, mel_lens):
-    alignment = np.zeros([attn.shape[0],attn.shape[2]])
+    alignment = np.zeros([attn.shape[0], attn.shape[2]])
    mel_lens = mel_lens.numpy()
    for i in range(attn.shape[0]):
        for j in range(mel_lens[i]):
-            max_index = np.argmax(attn[i,j])
-            alignment[i,max_index] += 1
+            max_index = np.argmax(attn[i, j])
+            alignment[i, max_index] += 1

    return alignment
-
-
--- a/parakeet/models/transformer_tts/init.py
+++ b/parakeet/models/transformer_tts/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/parakeet/models/transformer_tts/cbhg.py
+++ b/parakeet/models/transformer_tts/cbhg.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import math
 from parakeet.g2p.text.symbols import symbols
 import paddle.fluid.dygraph as dg
@ -7,9 +20,16 @@ from parakeet.modules.customized import Pool1D, Conv1D
 from parakeet.modules.dynamic_gru import DynamicGRU
 import numpy as np

+
 class CBHG(dg.Layer):
-    def __init__(self, hidden_size, batch_size, K=16, projection_size = 256, num_gru_layers=2, 
-                 max_pool_kernel_size=2, is_post=False):
+    def __init__(self,
+                 hidden_size,
+                 batch_size,
+                 K=16,
+                 projection_size=256,
+                 num_gru_layers=2,
+                 max_pool_kernel_size=2,
+                 is_post=False):
        super(CBHG, self).__init__()
        """
        :param hidden_size: dimension of hidden unit
@ -24,28 +44,39 @@ class CBHG(dg.Layer):
        self.projection_size = projection_size
        self.conv_list = []
        k = math.sqrt(1 / projection_size)
-        self.conv_list.append(Conv1D(num_channels = projection_size,
-                            num_filters = hidden_size,
-                            filter_size = 1,
-                            padding = int(np.floor(1/2)),
-                            param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                            bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k))))
+        self.conv_list.append(
+            Conv1D(
+                num_channels=projection_size,
+                num_filters=hidden_size,
+                filter_size=1,
+                padding=int(np.floor(1 / 2)),
+                param_attr=fluid.ParamAttr(
+                    initializer=fluid.initializer.XavierInitializer()),
+                bias_attr=fluid.ParamAttr(
+                    initializer=fluid.initializer.Uniform(
+                        low=-k, high=k))))
        k = math.sqrt(1 / hidden_size)
-        for i in range(2,K+1):
-            self.conv_list.append(Conv1D(num_channels = hidden_size,
-                            num_filters = hidden_size,
-                            filter_size = i,
-                            padding = int(np.floor(i/2)),
-                            param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                            bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k))))
+        for i in range(2, K + 1):
+            self.conv_list.append(
+                Conv1D(
+                    num_channels=hidden_size,
+                    num_filters=hidden_size,
+                    filter_size=i,
+                    padding=int(np.floor(i / 2)),
+                    param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.XavierInitializer()),
+                    bias_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Uniform(
+                            low=-k, high=k))))

        for i, layer in enumerate(self.conv_list):
            self.add_sublayer("conv_list_{}".format(i), layer)

        self.batchnorm_list = []
        for i in range(K):
-            self.batchnorm_list.append(dg.BatchNorm(hidden_size, 
-                            data_layout='NCHW'))
+            self.batchnorm_list.append(
+                dg.BatchNorm(
+                    hidden_size, data_layout='NCHW'))

        for i, layer in enumerate(self.batchnorm_list):
            self.add_sublayer("batchnorm_list_{}".format(i), layer)
@ -53,68 +84,94 @@ class CBHG(dg.Layer):
        conv_outdim = hidden_size * K

        k = math.sqrt(1 / conv_outdim)
-        self.conv_projection_1 = Conv1D(num_channels = conv_outdim,
-                            num_filters = hidden_size,
-                            filter_size = 3,
-                            padding = int(np.floor(3/2)),
-                            param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                            bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)))
+        self.conv_projection_1 = Conv1D(
+            num_channels=conv_outdim,
+            num_filters=hidden_size,
+            filter_size=3,
+            padding=int(np.floor(3 / 2)),
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))

        k = math.sqrt(1 / hidden_size)
-        self.conv_projection_2 = Conv1D(num_channels = hidden_size,
-                            num_filters = projection_size,
-                            filter_size = 3,
-                            padding = int(np.floor(3/2)),
-                            param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                            bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)))
+        self.conv_projection_2 = Conv1D(
+            num_channels=hidden_size,
+            num_filters=projection_size,
+            filter_size=3,
+            padding=int(np.floor(3 / 2)),
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))

-        self.batchnorm_proj_1 = dg.BatchNorm(hidden_size, 
-                            data_layout='NCHW')
-        self.batchnorm_proj_2 = dg.BatchNorm(projection_size, 
-                            data_layout='NCHW')
-        self.max_pool = Pool1D(pool_size = max_pool_kernel_size, 
-                    pool_type='max', 
-                    pool_stride=1, 
-                    pool_padding=1,
-                    data_format = "NCT")
+        self.batchnorm_proj_1 = dg.BatchNorm(hidden_size, data_layout='NCHW')
+        self.batchnorm_proj_2 = dg.BatchNorm(
+            projection_size, data_layout='NCHW')
+        self.max_pool = Pool1D(
+            pool_size=max_pool_kernel_size,
+            pool_type='max',
+            pool_stride=1,
+            pool_padding=1,
+            data_format="NCT")
        self.highway = Highwaynet(self.projection_size)

        h_0 = np.zeros((batch_size, hidden_size // 2), dtype="float32")
        h_0 = dg.to_variable(h_0)
        k = math.sqrt(1 / hidden_size)
-        self.fc_forward1 = dg.Linear(hidden_size, hidden_size // 2 * 3,
-                           param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                           bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
-        self.fc_reverse1 = dg.Linear(hidden_size, hidden_size // 2 * 3,
-                            param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                            bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
-        self.gru_forward1 = DynamicGRU(size = self.hidden_size // 2,
-                              is_reverse = False,
-                              origin_mode = True,
-                              h_0 = h_0)
-        self.gru_reverse1 = DynamicGRU(size = self.hidden_size // 2,
-                              is_reverse=True,
-                              origin_mode=True,
-                              h_0 = h_0)
+        self.fc_forward1 = dg.Linear(
+            hidden_size,
+            hidden_size // 2 * 3,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))
+        self.fc_reverse1 = dg.Linear(
+            hidden_size,
+            hidden_size // 2 * 3,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))
+        self.gru_forward1 = DynamicGRU(
+            size=self.hidden_size // 2,
+            is_reverse=False,
+            origin_mode=True,
+            h_0=h_0)
+        self.gru_reverse1 = DynamicGRU(
+            size=self.hidden_size // 2,
+            is_reverse=True,
+            origin_mode=True,
+            h_0=h_0)

-        self.fc_forward2 = dg.Linear(hidden_size, hidden_size // 2 * 3,
-                           param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                           bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
-        self.fc_reverse2 = dg.Linear(hidden_size, hidden_size // 2 * 3,
-                           param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                           bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
-        self.gru_forward2 = DynamicGRU(size = self.hidden_size // 2,
-                              is_reverse = False,
-                              origin_mode = True,
-                              h_0 = h_0)
-        self.gru_reverse2 = DynamicGRU(size = self.hidden_size // 2,
-                              is_reverse=True,
-                              origin_mode=True,
-                              h_0 = h_0)
+        self.fc_forward2 = dg.Linear(
+            hidden_size,
+            hidden_size // 2 * 3,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))
+        self.fc_reverse2 = dg.Linear(
+            hidden_size,
+            hidden_size // 2 * 3,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))
+        self.gru_forward2 = DynamicGRU(
+            size=self.hidden_size // 2,
+            is_reverse=False,
+            origin_mode=True,
+            h_0=h_0)
+        self.gru_reverse2 = DynamicGRU(
+            size=self.hidden_size // 2,
+            is_reverse=True,
+            origin_mode=True,
+            h_0=h_0)

    def _conv_fit_dim(self, x, filter_size=3):
        if filter_size % 2 == 0:
-            return x[:,:,:-1]
+            return x[:, :, :-1]
        else:
            return x

@ -124,20 +181,23 @@ class CBHG(dg.Layer):
        conv_list = []
        conv_input = input_

-        for i, (conv, batchnorm) in enumerate(zip(self.conv_list, self.batchnorm_list)):
-            conv_input = self._conv_fit_dim(conv(conv_input), i+1)
+        for i, (conv, batchnorm
+                ) in enumerate(zip(self.conv_list, self.batchnorm_list)):
+            conv_input = self._conv_fit_dim(conv(conv_input), i + 1)
            conv_input = layers.relu(batchnorm(conv_input))
            conv_list.append(conv_input)

        conv_cat = layers.concat(conv_list, axis=1)
-        conv_pool = self.max_pool(conv_cat)[:,:,:-1]
+        conv_pool = self.max_pool(conv_cat)[:, :, :-1]

-        
-        conv_proj = layers.relu(self.batchnorm_proj_1(self._conv_fit_dim(self.conv_projection_1(conv_pool))))
-        conv_proj = self.batchnorm_proj_2(self._conv_fit_dim(self.conv_projection_2(conv_proj))) + input_
+        conv_proj = layers.relu(
+            self.batchnorm_proj_1(
+                self._conv_fit_dim(self.conv_projection_1(conv_pool))))
+        conv_proj = self.batchnorm_proj_2(
+            self._conv_fit_dim(self.conv_projection_2(conv_proj))) + input_

        # conv_proj.shape = [N, C, T]
-        highway = layers.transpose(conv_proj, [0,2,1])
+        highway = layers.transpose(conv_proj, [0, 2, 1])
        highway = self.highway(highway)

        # highway.shape = [N, T, C]
@ -151,9 +211,10 @@ class CBHG(dg.Layer):
        out_forward = self.gru_forward2(fc_forward)
        out_reverse = self.gru_reverse2(fc_reverse)
        out = layers.concat([out_forward, out_reverse], axis=-1)
-        out = layers.transpose(out, [0,2,1])
+        out = layers.transpose(out, [0, 2, 1])
        return out

+
 class Highwaynet(dg.Layer):
    def __init__(self, num_units, num_layers=4):
        super(Highwaynet, self).__init__()
@ -164,14 +225,26 @@ class Highwaynet(dg.Layer):
        self.linears = []
        k = math.sqrt(1 / num_units)
        for i in range(num_layers):
-            self.linears.append(dg.Linear(num_units, num_units,
-                                param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                                bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))))
-            self.gates.append(dg.Linear(num_units, num_units,
-                              param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                              bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))))
+            self.linears.append(
+                dg.Linear(
+                    num_units,
+                    num_units,
+                    param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.XavierInitializer()),
+                    bias_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Uniform(
+                            low=-k, high=k))))
+            self.gates.append(
+                dg.Linear(
+                    num_units,
+                    num_units,
+                    param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.XavierInitializer()),
+                    bias_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Uniform(
+                            low=-k, high=k))))

-        for i, (linear, gate) in enumerate(zip(self.linears,self.gates)):
+        for i, (linear, gate) in enumerate(zip(self.linears, self.gates)):
            self.add_sublayer("linears_{}".format(i), linear)
            self.add_sublayer("gates_{}".format(i), gate)

@ -183,12 +256,6 @@ class Highwaynet(dg.Layer):
            t_ = fluid.layers.sigmoid(gate(out))

            c = 1 - t_
-            out  = h * t_ + out  * c
+            out = h * t_ + out * c

        return out
-
-
-
-
-                
-        
--- a/parakeet/models/transformer_tts/decoder.py
+++ b/parakeet/models/transformer_tts/decoder.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import math
 import paddle.fluid.dygraph as dg
 import paddle.fluid as fluid
@ -7,48 +20,83 @@ from parakeet.modules.ffn import PositionwiseFeedForward
 from parakeet.models.transformer_tts.prenet import PreNet
 from parakeet.models.transformer_tts.post_convnet import PostConvNet

+
 class Decoder(dg.Layer):
    def __init__(self, num_hidden, config, num_head=4):
        super(Decoder, self).__init__()
        self.num_hidden = num_hidden
        param = fluid.ParamAttr()
-        self.alpha = self.create_parameter(shape=(1,), attr=param, dtype='float32',
-                        default_initializer = fluid.initializer.ConstantInitializer(value=1.0))
-        self.pos_inp = get_sinusoid_encoding_table(1024, self.num_hidden, padding_idx=0)
-        self.pos_emb = dg.Embedding(size=[1024, num_hidden],
-                                 padding_idx=0,
-                                 param_attr=fluid.ParamAttr(
-                                     initializer=fluid.initializer.NumpyArrayInitializer(self.pos_inp),
-                                     trainable=False))
-        self.decoder_prenet = PreNet(input_size = config['audio']['num_mels'], 
-                                            hidden_size = num_hidden * 2, 
-                                            output_size = num_hidden, 
-                                            dropout_rate=0.2)
+        self.alpha = self.create_parameter(
+            shape=(1, ),
+            attr=param,
+            dtype='float32',
+            default_initializer=fluid.initializer.ConstantInitializer(
+                value=1.0))
+        self.pos_inp = get_sinusoid_encoding_table(
+            1024, self.num_hidden, padding_idx=0)
+        self.pos_emb = dg.Embedding(
+            size=[1024, num_hidden],
+            padding_idx=0,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.NumpyArrayInitializer(
+                    self.pos_inp),
+                trainable=False))
+        self.decoder_prenet = PreNet(
+            input_size=config['audio']['num_mels'],
+            hidden_size=num_hidden * 2,
+            output_size=num_hidden,
+            dropout_rate=0.2)
        k = math.sqrt(1 / num_hidden)
-        self.linear = dg.Linear(num_hidden, num_hidden,
-                                param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                                bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
+        self.linear = dg.Linear(
+            num_hidden,
+            num_hidden,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))

-        self.selfattn_layers = [MultiheadAttention(num_hidden, num_hidden//num_head, num_hidden//num_head) for _ in range(3)]
+        self.selfattn_layers = [
+            MultiheadAttention(num_hidden, num_hidden // num_head,
+                               num_hidden // num_head) for _ in range(3)
+        ]
        for i, layer in enumerate(self.selfattn_layers):
            self.add_sublayer("self_attn_{}".format(i), layer)
-        self.attn_layers = [MultiheadAttention(num_hidden, num_hidden//num_head, num_hidden//num_head) for _ in range(3)]
+        self.attn_layers = [
+            MultiheadAttention(num_hidden, num_hidden // num_head,
+                               num_hidden // num_head) for _ in range(3)
+        ]
        for i, layer in enumerate(self.attn_layers):
            self.add_sublayer("attn_{}".format(i), layer)
-        self.ffns = [PositionwiseFeedForward(num_hidden, num_hidden*num_head, filter_size=1) for _ in range(3)]
+        self.ffns = [
+            PositionwiseFeedForward(
+                num_hidden, num_hidden * num_head, filter_size=1)
+            for _ in range(3)
+        ]
        for i, layer in enumerate(self.ffns):
            self.add_sublayer("ffns_{}".format(i), layer)
-        self.mel_linear = dg.Linear(num_hidden, config['audio']['num_mels'] * config['audio']['outputs_per_step'],
-                                param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                                bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
-        self.stop_linear = dg.Linear(num_hidden, 1,
-                                  param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                                  bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
+        self.mel_linear = dg.Linear(
+            num_hidden,
+            config['audio']['num_mels'] * config['audio']['outputs_per_step'],
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))
+        self.stop_linear = dg.Linear(
+            num_hidden,
+            1,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))

-        self.postconvnet = PostConvNet(config['audio']['num_mels'], config['hidden_size'], 
-                                       filter_size = 5, padding = 4, num_conv=5, 
-                                       outputs_per_step=config['audio']['outputs_per_step'], 
-                                       use_cudnn = True)
+        self.postconvnet = PostConvNet(
+            config['audio']['num_mels'],
+            config['hidden_size'],
+            filter_size=5,
+            padding=4,
+            num_conv=5,
+            outputs_per_step=config['audio']['outputs_per_step'],
+            use_cudnn=True)

    def forward(self, key, value, query, c_mask, positional):

@ -56,15 +104,20 @@ class Decoder(dg.Layer):

        if fluid.framework._dygraph_tracer()._train_mode:
            m_mask = get_non_pad_mask(positional)
-            mask = get_attn_key_pad_mask((positional==0).astype(np.float32), query)
-            triu_tensor = dg.to_variable(get_triu_tensor(query.numpy(), query.numpy())).astype(np.float32)
+            mask = get_attn_key_pad_mask((positional == 0).astype(np.float32),
+                                         query)
+            triu_tensor = dg.to_variable(
+                get_triu_tensor(query.numpy(), query.numpy())).astype(
+                    np.float32)
            mask = mask + triu_tensor
            mask = fluid.layers.cast(mask == 0, np.float32)

            # (batch_size, decoder_len, encoder_len)
-            zero_mask = get_attn_key_pad_mask(layers.squeeze(c_mask,[-1]), query)
+            zero_mask = get_attn_key_pad_mask(
+                layers.squeeze(c_mask, [-1]), query)
        else:
-            mask = get_triu_tensor(query.numpy(), query.numpy()).astype(np.float32)
+            mask = get_triu_tensor(query.numpy(),
+                                   query.numpy()).astype(np.float32)
            mask = fluid.layers.cast(dg.to_variable(mask == 0), np.float32)
            m_mask, zero_mask = None, None

@ -85,9 +138,12 @@ class Decoder(dg.Layer):
        selfattn_list = list()
        attn_list = list()

-        for selfattn, attn, ffn in zip(self.selfattn_layers, self.attn_layers, self.ffns):
-            query, attn_dec = selfattn(query, query, query, mask = mask, query_mask = m_mask)
-            query, attn_dot = attn(key, value, query, mask = zero_mask, query_mask = m_mask)
+        for selfattn, attn, ffn in zip(self.selfattn_layers, self.attn_layers,
+                                       self.ffns):
+            query, attn_dec = selfattn(
+                query, query, query, mask=mask, query_mask=m_mask)
+            query, attn_dot = attn(
+                key, value, query, mask=zero_mask, query_mask=m_mask)
            query = ffn(query)
            selfattn_list.append(attn_dec)
            attn_list.append(attn_dot)
--- a/parakeet/models/transformer_tts/encoder.py
+++ b/parakeet/models/transformer_tts/encoder.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle.fluid.dygraph as dg
 import paddle.fluid as fluid
 from parakeet.models.transformer_tts.utils import *
@ -5,25 +18,41 @@ from parakeet.modules.multihead_attention import MultiheadAttention
 from parakeet.modules.ffn import PositionwiseFeedForward
 from parakeet.models.transformer_tts.encoderprenet import EncoderPrenet

+
 class Encoder(dg.Layer):
    def __init__(self, embedding_size, num_hidden, num_head=4):
        super(Encoder, self).__init__()
        self.num_hidden = num_hidden
-        param = fluid.ParamAttr(initializer=fluid.initializer.Constant(value=1.0))
-        self.alpha = self.create_parameter(shape=(1, ), attr=param, dtype='float32')
-        self.pos_inp = get_sinusoid_encoding_table(1024, self.num_hidden, padding_idx=0)
-        self.pos_emb = dg.Embedding(size=[1024, num_hidden],
-                                 padding_idx=0,
-                                 param_attr=fluid.ParamAttr(
-                                     initializer=fluid.initializer.NumpyArrayInitializer(self.pos_inp),
-                                     trainable=False))
-        self.encoder_prenet = EncoderPrenet(embedding_size = embedding_size, 
-                                            num_hidden = num_hidden, 
-                                            use_cudnn=True)
-        self.layers = [MultiheadAttention(num_hidden, num_hidden//num_head, num_hidden//num_head) for _ in range(3)]
+        param = fluid.ParamAttr(initializer=fluid.initializer.Constant(
+            value=1.0))
+        self.alpha = self.create_parameter(
+            shape=(1, ), attr=param, dtype='float32')
+        self.pos_inp = get_sinusoid_encoding_table(
+            1024, self.num_hidden, padding_idx=0)
+        self.pos_emb = dg.Embedding(
+            size=[1024, num_hidden],
+            padding_idx=0,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.NumpyArrayInitializer(
+                    self.pos_inp),
+                trainable=False))
+        self.encoder_prenet = EncoderPrenet(
+            embedding_size=embedding_size,
+            num_hidden=num_hidden,
+            use_cudnn=True)
+        self.layers = [
+            MultiheadAttention(num_hidden, num_hidden // num_head,
+                               num_hidden // num_head) for _ in range(3)
+        ]
        for i, layer in enumerate(self.layers):
            self.add_sublayer("self_attn_{}".format(i), layer)
-        self.ffns = [PositionwiseFeedForward(num_hidden, num_hidden*num_head, filter_size=1, use_cudnn = True) for _ in range(3)]
+        self.ffns = [
+            PositionwiseFeedForward(
+                num_hidden,
+                num_hidden * num_head,
+                filter_size=1,
+                use_cudnn=True) for _ in range(3)
+        ]
        for i, layer in enumerate(self.ffns):
            self.add_sublayer("ffns_{}".format(i), layer)

@ -35,14 +64,12 @@ class Encoder(dg.Layer):
            query_mask, mask = None, None

        # Encoder pre_network
-        x = self.encoder_prenet(x) #(N,T,C)
-        
+        x = self.encoder_prenet(x)  #(N,T,C)

        # Get positional encoding
        positional = self.pos_emb(positional)

-        x = positional * self.alpha + x #(N, T, C)
-       
+        x = positional * self.alpha + x  #(N, T, C)

        # Positional dropout
        x = layers.dropout(x, 0.1)
@ -50,7 +77,7 @@ class Encoder(dg.Layer):
        # Self attention encoder
        attentions = list()
        for layer, ffn in zip(self.layers, self.ffns):
-            x, attention = layer(x, x, x, mask = mask, query_mask = query_mask)
+            x, attention = layer(x, x, x, mask=mask, query_mask=query_mask)
            x = ffn(x)
            attentions.append(attention)

--- a/parakeet/models/transformer_tts/encoderprenet.py
+++ b/parakeet/models/transformer_tts/encoderprenet.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import math
 from parakeet.g2p.text.symbols import symbols
 import paddle.fluid.dygraph as dg
@ -13,47 +26,63 @@ class EncoderPrenet(dg.Layer):
        self.embedding_size = embedding_size
        self.num_hidden = num_hidden
        self.use_cudnn = use_cudnn
-        self.embedding = dg.Embedding( size = [len(symbols), embedding_size],
-                                        padding_idx = None)
+        self.embedding = dg.Embedding(
+            size=[len(symbols), embedding_size], padding_idx=None)
        self.conv_list = []
        k = math.sqrt(1 / embedding_size)
-        self.conv_list.append(Conv1D(num_channels = embedding_size, 
-                            num_filters = num_hidden, 
-                            filter_size = 5,
-                            padding = int(np.floor(5/2)),
-                            param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                            bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
-                            use_cudnn = use_cudnn))
+        self.conv_list.append(
+            Conv1D(
+                num_channels=embedding_size,
+                num_filters=num_hidden,
+                filter_size=5,
+                padding=int(np.floor(5 / 2)),
+                param_attr=fluid.ParamAttr(
+                    initializer=fluid.initializer.XavierInitializer()),
+                bias_attr=fluid.ParamAttr(
+                    initializer=fluid.initializer.Uniform(
+                        low=-k, high=k)),
+                use_cudnn=use_cudnn))
        k = math.sqrt(1 / num_hidden)
        for _ in range(2):
-            self.conv_list.append(Conv1D(num_channels = num_hidden, 
-                                num_filters = num_hidden, 
-                                filter_size = 5,
-                                padding = int(np.floor(5/2)),
-                                param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                                bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
-                                use_cudnn = use_cudnn))
+            self.conv_list.append(
+                Conv1D(
+                    num_channels=num_hidden,
+                    num_filters=num_hidden,
+                    filter_size=5,
+                    padding=int(np.floor(5 / 2)),
+                    param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.XavierInitializer()),
+                    bias_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Uniform(
+                            low=-k, high=k)),
+                    use_cudnn=use_cudnn))

        for i, layer in enumerate(self.conv_list):
            self.add_sublayer("conv_list_{}".format(i), layer)

-        self.batch_norm_list = [dg.BatchNorm(num_hidden, 
-                            data_layout='NCHW') for _ in range(3)]
+        self.batch_norm_list = [
+            dg.BatchNorm(
+                num_hidden, data_layout='NCHW') for _ in range(3)
+        ]

        for i, layer in enumerate(self.batch_norm_list):
            self.add_sublayer("batch_norm_list_{}".format(i), layer)

        k = math.sqrt(1 / num_hidden)
-        self.projection = dg.Linear(num_hidden, num_hidden,
-                                param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                                bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
+        self.projection = dg.Linear(
+            num_hidden,
+            num_hidden,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))

    def forward(self, x):
-        x = self.embedding(x) #(batch_size, seq_len, embending_size)
-        x = layers.transpose(x,[0,2,1])
+        x = self.embedding(x)  #(batch_size, seq_len, embending_size)
+        x = layers.transpose(x, [0, 2, 1])
        for batch_norm, conv in zip(self.batch_norm_list, self.conv_list):
            x = layers.dropout(layers.relu(batch_norm(conv(x))), 0.2)
-        x = layers.transpose(x,[0,2,1]) #(N,T,C)
+        x = layers.transpose(x, [0, 2, 1])  #(N,T,C)
        x = self.projection(x)

        return x
--- a/parakeet/models/transformer_tts/post_convnet.py
+++ b/parakeet/models/transformer_tts/post_convnet.py
@ -1,9 +1,23 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import math
 import paddle.fluid.dygraph as dg
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 from parakeet.modules.customized import Conv1D

+
 class PostConvNet(dg.Layer):
    def __init__(self,
                 n_mels=80,
@ -22,44 +36,61 @@ class PostConvNet(dg.Layer):
        self.batchnorm_last = batchnorm_last
        self.conv_list = []
        k = math.sqrt(1 / (n_mels * outputs_per_step))
-        self.conv_list.append(Conv1D(num_channels = n_mels * outputs_per_step,
-                            num_filters = num_hidden,
-                            filter_size = filter_size,
-                            padding = padding,
-                            param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                            bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
-                            use_cudnn = use_cudnn))
+        self.conv_list.append(
+            Conv1D(
+                num_channels=n_mels * outputs_per_step,
+                num_filters=num_hidden,
+                filter_size=filter_size,
+                padding=padding,
+                param_attr=fluid.ParamAttr(
+                    initializer=fluid.initializer.XavierInitializer()),
+                bias_attr=fluid.ParamAttr(
+                    initializer=fluid.initializer.Uniform(
+                        low=-k, high=k)),
+                use_cudnn=use_cudnn))

        k = math.sqrt(1 / num_hidden)
-        for _ in range(1, num_conv-1):
-            self.conv_list.append(Conv1D(num_channels = num_hidden,
-                                num_filters = num_hidden,
-                                filter_size = filter_size,
-                                padding = padding,
-                                param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                                bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
-                                use_cudnn = use_cudnn))
+        for _ in range(1, num_conv - 1):
+            self.conv_list.append(
+                Conv1D(
+                    num_channels=num_hidden,
+                    num_filters=num_hidden,
+                    filter_size=filter_size,
+                    padding=padding,
+                    param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.XavierInitializer()),
+                    bias_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Uniform(
+                            low=-k, high=k)),
+                    use_cudnn=use_cudnn))

-        self.conv_list.append(Conv1D(num_channels = num_hidden,
-                            num_filters = n_mels * outputs_per_step,
-                            filter_size = filter_size,
-                            padding = padding,
-                            param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                            bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
-                            use_cudnn = use_cudnn))
+        self.conv_list.append(
+            Conv1D(
+                num_channels=num_hidden,
+                num_filters=n_mels * outputs_per_step,
+                filter_size=filter_size,
+                padding=padding,
+                param_attr=fluid.ParamAttr(
+                    initializer=fluid.initializer.XavierInitializer()),
+                bias_attr=fluid.ParamAttr(
+                    initializer=fluid.initializer.Uniform(
+                        low=-k, high=k)),
+                use_cudnn=use_cudnn))

        for i, layer in enumerate(self.conv_list):
            self.add_sublayer("conv_list_{}".format(i), layer)

-        self.batch_norm_list = [dg.BatchNorm(num_hidden, 
-                            data_layout='NCHW') for _ in range(num_conv-1)]
+        self.batch_norm_list = [
+            dg.BatchNorm(
+                num_hidden, data_layout='NCHW') for _ in range(num_conv - 1)
+        ]
        if self.batchnorm_last:
-            self.batch_norm_list.append(dg.BatchNorm(n_mels * outputs_per_step, 
-                                data_layout='NCHW'))
+            self.batch_norm_list.append(
+                dg.BatchNorm(
+                    n_mels * outputs_per_step, data_layout='NCHW'))
        for i, layer in enumerate(self.batch_norm_list):
            self.add_sublayer("batch_norm_list_{}".format(i), layer)

-
    def forward(self, input):
        """
        Post Conv Net.
@ -70,17 +101,18 @@ class PostConvNet(dg.Layer):
            output (Variable), Shape(B, T, C), the result after postconvnet.
        """

-        input = layers.transpose(input, [0,2,1])
+        input = layers.transpose(input, [0, 2, 1])
        len = input.shape[-1]
-        for i in range(self.num_conv-1):
+        for i in range(self.num_conv - 1):
            batch_norm = self.batch_norm_list[i]
            conv = self.conv_list[i]

-            input = layers.dropout(layers.tanh(batch_norm(conv(input)[:,:,:len])), self.dropout)
-        conv = self.conv_list[self.num_conv-1]
-        input = conv(input)[:,:,:len]
+            input = layers.dropout(
+                layers.tanh(batch_norm(conv(input)[:, :, :len])), self.dropout)
+        conv = self.conv_list[self.num_conv - 1]
+        input = conv(input)[:, :, :len]
        if self.batchnorm_last:
-            batch_norm = self.batch_norm_list[self.num_conv-1]
+            batch_norm = self.batch_norm_list[self.num_conv - 1]
            input = layers.dropout(batch_norm(input), self.dropout)
-        output = layers.transpose(input, [0,2,1])
+        output = layers.transpose(input, [0, 2, 1])
        return output
--- a/parakeet/models/transformer_tts/prenet.py
+++ b/parakeet/models/transformer_tts/prenet.py
@ -1,8 +1,22 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import math
 import paddle.fluid.dygraph as dg
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers

+
 class PreNet(dg.Layer):
    def __init__(self, input_size, hidden_size, output_size, dropout_rate=0.2):
        """
@ -17,13 +31,21 @@ class PreNet(dg.Layer):
        self.dropout_rate = dropout_rate

        k = math.sqrt(1 / input_size)
-        self.linear1 = dg.Linear(input_size, hidden_size,
-                              param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                              bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
+        self.linear1 = dg.Linear(
+            input_size,
+            hidden_size,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))
        k = math.sqrt(1 / hidden_size)
-        self.linear2 = dg.Linear(hidden_size, output_size,
-                              param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
-                              bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
+        self.linear2 = dg.Linear(
+            hidden_size,
+            output_size,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)))

    def forward(self, x):
        """
--- a/parakeet/models/transformer_tts/transformer_tts.py
+++ b/parakeet/models/transformer_tts/transformer_tts.py
@ -1,8 +1,22 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle.fluid.dygraph as dg
 import paddle.fluid as fluid
 from parakeet.models.transformer_tts.encoder import Encoder
 from parakeet.models.transformer_tts.decoder import Decoder

+
 class TransformerTTS(dg.Layer):
    def __init__(self, config):
        super(TransformerTTS, self).__init__()
@ -14,13 +28,7 @@ class TransformerTTS(dg.Layer):

        key, c_mask, attns_enc = self.encoder(characters, pos_text)

-        mel_output, postnet_output, attn_probs, stop_preds, attns_dec = self.decoder(key, key, mel_input, c_mask, pos_mel)
+        mel_output, postnet_output, attn_probs, stop_preds, attns_dec = self.decoder(
+            key, key, mel_input, c_mask, pos_mel)

        return mel_output, postnet_output, attn_probs, stop_preds, attns_enc, attns_dec
-
-
-    
-
-
-            
-            
--- a/parakeet/models/transformer_tts/utils.py
+++ b/parakeet/models/transformer_tts/utils.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import numpy as np
 import librosa
 import os, copy
@ -6,14 +19,15 @@ import paddle.fluid.layers as layers


 def get_positional_table(d_pos_vec, n_position=1024):
-    position_enc = np.array([
-        [pos / np.power(10000, 2*i/d_pos_vec) for i in range(d_pos_vec)]
-        if pos != 0 else np.zeros(d_pos_vec) for pos in range(n_position)])
+    position_enc = np.array(
+        [[pos / np.power(10000, 2 * i / d_pos_vec) for i in range(d_pos_vec)]
+         if pos != 0 else np.zeros(d_pos_vec) for pos in range(n_position)])

-    position_enc[1:, 0::2] = np.sin(position_enc[1:, 0::2]) # dim 2i
-    position_enc[1:, 1::2] = np.cos(position_enc[1:, 1::2]) # dim 2i+1
+    position_enc[1:, 0::2] = np.sin(position_enc[1:, 0::2])  # dim 2i
+    position_enc[1:, 1::2] = np.cos(position_enc[1:, 1::2])  # dim 2i+1
    return position_enc

+
 def get_sinusoid_encoding_table(n_position, d_hid, padding_idx=None):
    ''' Sinusoid position encoding table '''

@ -23,7 +37,8 @@ def get_sinusoid_encoding_table(n_position, d_hid, padding_idx=None):
    def get_posi_angle_vec(position):
        return [cal_angle(position, hid_j) for hid_j in range(d_hid)]

-    sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])
+    sinusoid_table = np.array(
+        [get_posi_angle_vec(pos_i) for pos_i in range(n_position)])

    sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
    sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1
@ -34,8 +49,10 @@ def get_sinusoid_encoding_table(n_position, d_hid, padding_idx=None):

    return sinusoid_table

+
 def get_non_pad_mask(seq):
-    return layers.unsqueeze((seq != 0).astype(np.float32),[-1])
+    return layers.unsqueeze((seq != 0).astype(np.float32), [-1])
+

 def get_attn_key_pad_mask(seq_k, seq_q):
    ''' For masking out the padding part of key sequence. '''
@ -43,32 +60,37 @@ def get_attn_key_pad_mask(seq_k, seq_q):
    # Expand to fit the shape of key query attention matrix.
    len_q = seq_q.shape[1]
    padding_mask = (seq_k != 0).astype(np.float32)
-    padding_mask = layers.expand(layers.unsqueeze(padding_mask,[1]), [1, len_q, 1]) 
+    padding_mask = layers.expand(
+        layers.unsqueeze(padding_mask, [1]), [1, len_q, 1])
    return padding_mask

+
 def get_triu_tensor(seq_k, seq_q):
    ''' For make a triu tensor '''
    len_k = seq_k.shape[1]
    len_q = seq_q.shape[1]
    batch_size = seq_k.shape[0]
    triu_tensor = np.triu(np.ones([len_k, len_q]), 1)
-    triu_tensor = np.repeat(np.expand_dims(triu_tensor, axis=0) ,batch_size, axis=0)
+    triu_tensor = np.repeat(
+        np.expand_dims(
+            triu_tensor, axis=0), batch_size, axis=0)

    return triu_tensor

+
 def guided_attention(N, T, g=0.2):
    '''Guided attention. Refer to page 3 on the paper.'''
    W = np.zeros((N, T), dtype=np.float32)
    for n_pos in range(W.shape[0]):
        for t_pos in range(W.shape[1]):
-            W[n_pos, t_pos] = 1 - np.exp(-(t_pos / float(T) - n_pos / float(N)) ** 2 / (2 * g * g))
+            W[n_pos, t_pos] = 1 - np.exp(-(t_pos / float(T) - n_pos / float(N))
+                                         **2 / (2 * g * g))
    return W


 def cross_entropy(input, label, position_weight=1.0, epsilon=1e-30):
-    output = -1 * label * layers.log(input + epsilon) - (1-label) * layers.log(1 - input + epsilon)
+    output = -1 * label * layers.log(input + epsilon) - (
+        1 - label) * layers.log(1 - input + epsilon)
    output = output * (label * (position_weight - 1) + 1)

    return layers.reduce_sum(output, dim=[0, 1])
-        
-
--- a/parakeet/models/transformer_tts/vocoder.py
+++ b/parakeet/models/transformer_tts/vocoder.py
@ -1,27 +1,44 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle.fluid.dygraph as dg
 import paddle.fluid as fluid
 from parakeet.modules.customized import Conv1D
 from parakeet.models.transformer_tts.utils import *
 from parakeet.models.transformer_tts.cbhg import CBHG

+
 class Vocoder(dg.Layer):
    """
    CBHG Network (mel -> linear)
    """
+
    def __init__(self, config, batch_size):
        super(Vocoder, self).__init__()
-        self.pre_proj = Conv1D(num_channels = config['audio']['num_mels'], 
-                             num_filters = config['hidden_size'],
-                             filter_size=1)
+        self.pre_proj = Conv1D(
+            num_channels=config['audio']['num_mels'],
+            num_filters=config['hidden_size'],
+            filter_size=1)
        self.cbhg = CBHG(config['hidden_size'], batch_size)
-        self.post_proj = Conv1D(num_channels = config['hidden_size'], 
-                             num_filters = (config['audio']['n_fft'] // 2) + 1,
-                             filter_size=1)
+        self.post_proj = Conv1D(
+            num_channels=config['hidden_size'],
+            num_filters=(config['audio']['n_fft'] // 2) + 1,
+            filter_size=1)

    def forward(self, mel):
-        mel = layers.transpose(mel, [0,2,1])
+        mel = layers.transpose(mel, [0, 2, 1])
        mel = self.pre_proj(mel)
        mel = self.cbhg(mel)
        mag_pred = self.post_proj(mel)
-        mag_pred = layers.transpose(mag_pred, [0,2,1])
+        mag_pred = layers.transpose(mag_pred, [0, 2, 1])
        return mag_pred
--- a/parakeet/models/waveflow/init.py
+++ b/parakeet/models/waveflow/init.py
@ -1 +1,15 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from parakeet.models.waveflow.waveflow import WaveFlow
--- a/parakeet/models/waveflow/data.py
+++ b/parakeet/models/waveflow/data.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import random

 import librosa
--- a/parakeet/models/waveflow/waveflow.py
+++ b/parakeet/models/waveflow/waveflow.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import itertools
 import os
 import time
--- a/parakeet/models/waveflow/waveflow_modules.py
+++ b/parakeet/models/waveflow/waveflow_modules.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import itertools
 import numpy as np
 import paddle.fluid.dygraph as dg
--- a/parakeet/models/wavenet/data.py
+++ b/parakeet/models/wavenet/data.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import random

 import librosa
@ -18,8 +32,8 @@ class Dataset(ljspeech.LJSpeech):
        self.fft_window_shift = config.fft_window_shift
        # Calculate context frames.
        frames_per_second = config.sample_rate // self.fft_window_shift
-        train_clip_frames = int(np.ceil(
-            config.train_clip_second * frames_per_second))
+        train_clip_frames = int(
+            np.ceil(config.train_clip_second * frames_per_second))
        context_frames = config.context_size // self.fft_window_shift
        self.num_frames = train_clip_frames + context_frames

@ -53,12 +67,16 @@ class Dataset(ljspeech.LJSpeech):
        # Compute mel-spectrogram.
        # Turn center to False to prevent internal padding.
        spectrogram = librosa.core.stft(
-            audio, hop_length=fft_window_shift,
-            win_length=fft_window_size, n_fft=fft_size, center=False)
+            audio,
+            hop_length=fft_window_shift,
+            win_length=fft_window_size,
+            n_fft=fft_size,
+            center=False)
        spectrogram_magnitude = np.abs(spectrogram)

        # Compute mel-spectrograms.
-        mel_filter_bank = librosa.filters.mel(sr=sr, n_fft=fft_size,
+        mel_filter_bank = librosa.filters.mel(sr=sr,
+                                              n_fft=fft_size,
                                              n_mels=config.mel_bands)
        mel_spectrogram = np.dot(mel_filter_bank, spectrogram_magnitude)
        mel_spectrogram = mel_spectrogram.T
@ -70,7 +88,7 @@ class Dataset(ljspeech.LJSpeech):
        mel_spectrogram = np.clip((mel_spectrogram + 100) / 100, 0, 1)

        # Extract the center of audio that corresponds to mel spectrograms.
-        audio = audio[fft_padding : -fft_padding]
+        audio = audio[fft_padding:-fft_padding]
        assert mel_spectrogram.shape[0] * fft_window_shift == audio.size

        return audio, mel_spectrogram
@ -101,7 +119,7 @@ class Subset(dataset.Dataset):
            audio_start = frame_start * fft_window_shift
            audio_end = frame_end * fft_window_shift

-            audio = audio[audio_start : audio_end]
+            audio = audio[audio_start:audio_end]

        return audio, mel, audio_start

@ -141,14 +159,14 @@ class LJSpeech:
        sampler = DistributedSampler(len(trainset), nranks, rank)
        total_bs = config.batch_size
        assert total_bs % nranks == 0
-        train_sampler = BatchSampler(sampler, total_bs // nranks,
-            drop_last=True)
+        train_sampler = BatchSampler(
+            sampler, total_bs // nranks, drop_last=True)
        trainloader = DataCargo(trainset, batch_sampler=train_sampler)

        trainreader = fluid.io.PyReader(capacity=50, return_list=True)
        trainreader.decorate_batch_generator(trainloader, place)
        self.trainloader = (data for _ in iter(int, 1)
-            for data in trainreader())
+                            for data in trainreader())

        # Valid dataset.
        validset = Subset(ds, valid_indices, valid=True)
--- a/parakeet/models/wavenet/slurm.py
+++ b/parakeet/models/wavenet/slurm.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 Utility module for restarting training when using SLURM.
 """
@ -45,8 +58,8 @@ def parse_time(text):
    try:
        return parse_hours(hours) * 3600 + int(minutes) * 60 + int(seconds)
    except ValueError as e:
-        raise ValueError("Error parsing time {}. Got error {}.".format(
-            text, str(e)))
+        raise ValueError("Error parsing time {}. Got error {}.".format(text,
+                                                                       str(e)))


 def restart_command():
@ -76,8 +89,10 @@ def restart_command():
    gres, partition = info.get("Gres"), info.get("Partition")
    stderr, stdout = info.get("StdErr"), info.get("StdOut")
    job_name = info.get("JobName")
-    command = ["sbatch", "--job-name={}".format(job_name),
-               "--ntasks={}".format(num_tasks)]
+    command = [
+        "sbatch", "--job-name={}".format(job_name),
+        "--ntasks={}".format(num_tasks)
+    ]

    if partition:
        command.extend(["--partition", partition])
@ -98,12 +113,13 @@ def restart_command():
    dist_setting = ['-m', 'paddle.distributed.launch']
    wrap_cmd = ["srun", python, '-u'] + dist_setting + sys.argv

-    command.append(
-        "--wrap={}".format(" ".join(shlex.quote(arg) for arg in wrap_cmd)))
+    command.append("--wrap={}".format(" ".join(
+        shlex.quote(arg) for arg in wrap_cmd)))
    time_limit_string = info["TimeLimit"]
    if time_limit_string.lower() == "unlimited":
-        print("UNLIMITED detected: restart OFF, infinite learning ON.",
-              flush=True)
+        print(
+            "UNLIMITED detected: restart OFF, infinite learning ON.",
+            flush=True)
        return command, None
    time_limit = parse_time(time_limit_string)
    runtime = parse_time(info["RunTime"])
--- a/parakeet/models/wavenet/synthesis.py
+++ b/parakeet/models/wavenet/synthesis.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import random
 from pprint import pprint
@ -12,25 +26,42 @@ from wavenet import WaveNet


 def add_options_to_parser(parser):
-    parser.add_argument('--model', type=str, default='wavenet',
+    parser.add_argument(
+        '--model',
+        type=str,
+        default='wavenet',
        help="general name of the model")
-    parser.add_argument('--name', type=str,
-        help="specific name of the training model")
-    parser.add_argument('--root', type=str,
-        help="root path of the LJSpeech dataset")
+    parser.add_argument(
+        '--name', type=str, help="specific name of the training model")
+    parser.add_argument(
+        '--root', type=str, help="root path of the LJSpeech dataset")

-    parser.add_argument('--use_gpu', type=bool, default=True,
+    parser.add_argument(
+        '--use_gpu',
+        type=bool,
+        default=True,
        help="option to use gpu training")

-    parser.add_argument('--iteration', type=int, default=None,
+    parser.add_argument(
+        '--iteration',
+        type=int,
+        default=None,
        help=("which iteration of checkpoint to load, "
              "default to load the latest checkpoint"))
-    parser.add_argument('--checkpoint', type=str, default=None,
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        default=None,
        help="path of the checkpoint to load")

-    parser.add_argument('--output', type=str, default="./syn_audios",
+    parser.add_argument(
+        '--output',
+        type=str,
+        default="./syn_audios",
        help="path to write synthesized audio files")
-    parser.add_argument('--sample', type=int,
+    parser.add_argument(
+        '--sample',
+        type=int,
        help="which of the valid samples to synthesize audio")


--- a/parakeet/models/wavenet/train.py
+++ b/parakeet/models/wavenet/train.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import random
 import subprocess
@ -18,24 +32,42 @@ MAXIMUM_SAVE_TIME = 10 * 60


 def add_options_to_parser(parser):
-    parser.add_argument('--model', type=str, default='wavenet',
+    parser.add_argument(
+        '--model',
+        type=str,
+        default='wavenet',
        help="general name of the model")
-    parser.add_argument('--name', type=str,
-        help="specific name of the training model")
-    parser.add_argument('--root', type=str,
-        help="root path of the LJSpeech dataset")
+    parser.add_argument(
+        '--name', type=str, help="specific name of the training model")
+    parser.add_argument(
+        '--root', type=str, help="root path of the LJSpeech dataset")

-    parser.add_argument('--parallel', type=bool, default=True,
+    parser.add_argument(
+        '--parallel',
+        type=bool,
+        default=True,
        help="option to use data parallel training")
-    parser.add_argument('--use_gpu', type=bool, default=True,
+    parser.add_argument(
+        '--use_gpu',
+        type=bool,
+        default=True,
        help="option to use gpu training")

-    parser.add_argument('--iteration', type=int, default=None,
+    parser.add_argument(
+        '--iteration',
+        type=int,
+        default=None,
        help=("which iteration of checkpoint to load, "
              "default to load the latest checkpoint"))
-    parser.add_argument('--checkpoint', type=str, default=None,
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        default=None,
        help="path of the checkpoint to load")
-    parser.add_argument('--slurm', type=bool, default=False,
+    parser.add_argument(
+        '--slurm',
+        type=bool,
+        default=False,
        help="whether you are using slurm to submit training jobs")


@ -104,8 +136,8 @@ def train(config):

            # Check whether reaching the time limit.
            if config.slurm:
-                done = (death_time is not None and death_time - time.time() <
-                    MAXIMUM_SAVE_TIME)
+                done = (death_time is not None and
+                        death_time - time.time() < MAXIMUM_SAVE_TIME)

            if rank == 0 and done:
                print("Saving progress before exiting.")
@ -127,8 +159,8 @@ def train(config):

 if __name__ == "__main__":
    # Create parser.
-    parser = jsonargparse.ArgumentParser(description="Train WaveNet model",
-        formatter_class='default_argparse')
+    parser = jsonargparse.ArgumentParser(
+        description="Train WaveNet model", formatter_class='default_argparse')
    add_options_to_parser(parser)
    utils.add_config_options_to_parser(parser)

--- a/parakeet/models/wavenet/utils.py
+++ b/parakeet/models/wavenet/utils.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import itertools
 import os
 import time
@ -8,57 +22,82 @@ import paddle.fluid.dygraph as dg


 def add_config_options_to_parser(parser):
-    parser.add_argument('--valid_size', type=int,
-        help="size of the valid dataset")
-    parser.add_argument('--train_clip_second', type=float,
+    parser.add_argument(
+        '--valid_size', type=int, help="size of the valid dataset")
+    parser.add_argument(
+        '--train_clip_second',
+        type=float,
        help="the length of audio clip for training")
-    parser.add_argument('--sample_rate', type=int,
-        help="sampling rate of audio data file")
-    parser.add_argument('--fft_window_shift', type=int,
+    parser.add_argument(
+        '--sample_rate', type=int, help="sampling rate of audio data file")
+    parser.add_argument(
+        '--fft_window_shift',
+        type=int,
        help="the shift of fft window for each frame")
-    parser.add_argument('--fft_window_size', type=int,
+    parser.add_argument(
+        '--fft_window_size',
+        type=int,
        help="the size of fft window for each frame")
-    parser.add_argument('--fft_size', type=int,
-        help="the size of fft filter on each frame")
-    parser.add_argument('--mel_bands', type=int,
+    parser.add_argument(
+        '--fft_size', type=int, help="the size of fft filter on each frame")
+    parser.add_argument(
+        '--mel_bands',
+        type=int,
        help="the number of mel bands when calculating mel spectrograms")

-    parser.add_argument('--seed', type=int,
-        help="seed of random initialization for the model")
-    parser.add_argument('--batch_size', type=int,
-        help="batch size for training")
-    parser.add_argument('--test_every', type=int,
-        help="test interval during training")
-    parser.add_argument('--save_every', type=int,
+    parser.add_argument(
+        '--seed', type=int, help="seed of random initialization for the model")
+    parser.add_argument(
+        '--batch_size', type=int, help="batch size for training")
+    parser.add_argument(
+        '--test_every', type=int, help="test interval during training")
+    parser.add_argument(
+        '--save_every',
+        type=int,
        help="checkpointing interval during training")
-    parser.add_argument('--max_iterations', type=int,
-        help="maximum training iterations")
+    parser.add_argument(
+        '--max_iterations', type=int, help="maximum training iterations")

-    parser.add_argument('--layers', type=int,
-        help="number of dilated convolution layers")
-    parser.add_argument('--kernel_width', type=int,
-        help="dilated convolution kernel width")
-    parser.add_argument('--dilation_block', type=list,
-        help="dilated convolution kernel width")
+    parser.add_argument(
+        '--layers', type=int, help="number of dilated convolution layers")
+    parser.add_argument(
+        '--kernel_width', type=int, help="dilated convolution kernel width")
+    parser.add_argument(
+        '--dilation_block', type=list, help="dilated convolution kernel width")
    parser.add_argument('--residual_channels', type=int)
    parser.add_argument('--skip_channels', type=int)
-    parser.add_argument('--loss_type', type=str,
-        help="mix-gaussian-pdf or softmax")
-    parser.add_argument('--num_channels', type=int, default=None,
+    parser.add_argument(
+        '--loss_type', type=str, help="mix-gaussian-pdf or softmax")
+    parser.add_argument(
+        '--num_channels',
+        type=int,
+        default=None,
        help="number of channels for softmax output")
-    parser.add_argument('--num_mixtures', type=int, default=None,
+    parser.add_argument(
+        '--num_mixtures',
+        type=int,
+        default=None,
        help="number of gaussian mixtures for gaussian output")
-    parser.add_argument('--log_scale_min', type=float, default=None,
+    parser.add_argument(
+        '--log_scale_min',
+        type=float,
+        default=None,
        help="minimum clip value of log variance of gaussian output")

-    parser.add_argument('--conditioner.filter_sizes', type=list,
+    parser.add_argument(
+        '--conditioner.filter_sizes',
+        type=list,
        help="conv2d tranpose op filter sizes for building conditioner")
-    parser.add_argument('--conditioner.upsample_factors', type=list,
+    parser.add_argument(
+        '--conditioner.upsample_factors',
+        type=list,
        help="list of upsample factors for building conditioner")

    parser.add_argument('--learning_rate', type=float)
    parser.add_argument('--gradient_max_norm', type=float)
-    parser.add_argument('--anneal.every', type=int,
+    parser.add_argument(
+        '--anneal.every',
+        type=int,
        help="step interval for annealing learning rate")
    parser.add_argument('--anneal.rate', type=float)

@ -113,8 +152,12 @@ def save_latest_checkpoint(checkpoint_dir, iteration):
        handle.write("model_checkpoint_path: step-{}".format(iteration))


-def load_parameters(checkpoint_dir, rank, model, optimizer=None,
-                    iteration=None, file_path=None):
+def load_parameters(checkpoint_dir,
+                    rank,
+                    model,
+                    optimizer=None,
+                    iteration=None,
+                    file_path=None):
    if file_path is None:
        if iteration is None:
            iteration = load_latest_checkpoint(checkpoint_dir, rank)
@ -128,7 +171,7 @@ def load_parameters(checkpoint_dir, rank, model, optimizer=None,
    if optimizer and optimizer_dict:
        optimizer.set_dict(optimizer_dict)
        print("[checkpoint] Rank {}: loaded optimizer state from {}".format(
-              rank, file_path))
+            rank, file_path))


 def save_latest_parameters(checkpoint_dir, iteration, model, optimizer=None):
--- a/parakeet/models/wavenet/wavenet.py
+++ b/parakeet/models/wavenet/wavenet.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import itertools
 import os
 import time
@ -13,8 +27,13 @@ from wavenet_modules import WaveNetModule


 class WaveNet():
-    def __init__(self, config, checkpoint_dir, parallel=False, rank=0,
-                 nranks=1, tb_logger=None):
+    def __init__(self,
+                 config,
+                 checkpoint_dir,
+                 parallel=False,
+                 rank=0,
+                 nranks=1,
+                 tb_logger=None):
        # Process config to calculate the context size
        dilations = list(
            itertools.islice(
@ -45,9 +64,9 @@ class WaveNet():
        if training:
            # Create Learning rate scheduler.
            lr_scheduler = dg.ExponentialDecay(
-                learning_rate = config.learning_rate,
-                decay_steps = config.anneal.every,
-                decay_rate = config.anneal.rate,
+                learning_rate=config.learning_rate,
+                decay_steps=config.anneal.every,
+                decay_rate=config.anneal.rate,
                staircase=True)

            optimizer = fluid.optimizer.AdamOptimizer(
@ -57,10 +76,13 @@ class WaveNet():
                config.gradient_max_norm)

            # Load parameters.
-            utils.load_parameters(self.checkpoint_dir, self.rank,
-                                  wavenet, optimizer,
-                                  iteration=config.iteration,
-                                  file_path=config.checkpoint)
+            utils.load_parameters(
+                self.checkpoint_dir,
+                self.rank,
+                wavenet,
+                optimizer,
+                iteration=config.iteration,
+                file_path=config.checkpoint)
            print("Rank {}: checkpoint loaded.".format(self.rank))

            # Data parallelism.
@ -74,9 +96,12 @@ class WaveNet():

        else:
            # Load parameters.
-            utils.load_parameters(self.checkpoint_dir, self.rank, wavenet,
-                                  iteration=config.iteration,
-                                  file_path=config.checkpoint)
+            utils.load_parameters(
+                self.checkpoint_dir,
+                self.rank,
+                wavenet,
+                iteration=config.iteration,
+                file_path=config.checkpoint)
            print("Rank {}: checkpoint loaded.".format(self.rank))

            self.wavenet = wavenet
@ -104,7 +129,9 @@ class WaveNet():
        else:
            current_lr = self.optimizer._learning_rate

-        self.optimizer.minimize(loss, grad_clip=self.clipper,
+        self.optimizer.minimize(
+            loss,
+            grad_clip=self.clipper,
            parameter_list=self.wavenet.parameters())
        self.wavenet.clear_gradients()

@ -143,10 +170,16 @@ class WaveNet():

            tb = self.tb_logger
            tb.add_scalar("Valid-Avg-Loss", loss_val, iteration)
-            tb.add_audio("Teacher-Forced-Audio-0", sample_audios[0].numpy(),
-                iteration, sample_rate=self.config.sample_rate)
-            tb.add_audio("Teacher-Forced-Audio-1", sample_audios[1].numpy(),
-                iteration, sample_rate=self.config.sample_rate)
+            tb.add_audio(
+                "Teacher-Forced-Audio-0",
+                sample_audios[0].numpy(),
+                iteration,
+                sample_rate=self.config.sample_rate)
+            tb.add_audio(
+                "Teacher-Forced-Audio-1",
+                sample_audios[1].numpy(),
+                iteration,
+                sample_rate=self.config.sample_rate)

    @dg.no_grad
    def infer(self, iteration):
@ -165,10 +198,9 @@ class WaveNet():
        start_time = time.time()
        syn_audio = self.wavenet.synthesize(mels_list[sample])
        syn_time = time.time() - start_time
-        print("audio shape {}, synthesis time {}".format(
-            syn_audio.shape, syn_time))
-        librosa.output.write_wav(filename, syn_audio,
-            sr=config.sample_rate)
+        print("audio shape {}, synthesis time {}".format(syn_audio.shape,
+                                                         syn_time))
+        librosa.output.write_wav(filename, syn_audio, sr=config.sample_rate)

    def save(self, iteration):
        utils.save_latest_parameters(self.checkpoint_dir, iteration,
--- a/parakeet/models/wavenet/wavenet_modules.py
+++ b/parakeet/models/wavenet/wavenet_modules.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import itertools

 import numpy as np
@ -20,7 +34,7 @@ def extract_slices(x, audio_starts, audio_length, rank):
        start = audio_starts.numpy()[i]
        end = start + audio_length
        slice = fluid.layers.slice(
-            x, axes=[0, 1], starts=[i, start], ends=[i+1, end])
+            x, axes=[0, 1], starts=[i, start], ends=[i + 1, end])
        slices.append(fluid.layers.squeeze(slice, [0]))

    x = fluid.layers.stack(slices, axis=0)
@ -82,15 +96,13 @@ class WaveNetModule(dg.Layer):
                embed_dim=config.residual_channels,
                std=0.1)
        elif config.loss_type == "mix-gaussian-pdf":
-            self.embedding_fc = modules.FC(
-                self.full_name(),
-                in_features=1,
-                size=config.residual_channels,
-                num_flatten_dims=2,
-                relu=False)
+            self.embedding_fc = modules.FC(self.full_name(),
+                                           in_features=1,
+                                           size=config.residual_channels,
+                                           num_flatten_dims=2,
+                                           relu=False)
        else:
-            raise ValueError(
-                "loss_type {} is unsupported!".format(loss_type))
+            raise ValueError("loss_type {} is unsupported!".format(loss_type))

        self.dilated_causal_convs = []
        for dilation in self.dilations:
@ -102,56 +114,49 @@ class WaveNetModule(dg.Layer):
                    num_filters=config.residual_channels,
                    filter_size=config.kernel_width,
                    dilation=dilation,
-                    causal=True
-                )
-            )
+                    causal=True))

        for i, layer in enumerate(self.dilated_causal_convs):
            self.add_sublayer("dilated_causal_conv_{}".format(i), layer)

-        self.fc1 = modules.FC(
-            self.full_name(),
-            in_features=config.residual_channels,
-            size=config.skip_channels,
-            num_flatten_dims=2,
-            relu=True,
-            act="relu")
+        self.fc1 = modules.FC(self.full_name(),
+                              in_features=config.residual_channels,
+                              size=config.skip_channels,
+                              num_flatten_dims=2,
+                              relu=True,
+                              act="relu")

-        self.fc2 = modules.FC(
-            self.full_name(),
-            in_features=config.skip_channels,
-            size=config.skip_channels,
-            num_flatten_dims=2,
-            relu=True,
-            act="relu")
+        self.fc2 = modules.FC(self.full_name(),
+                              in_features=config.skip_channels,
+                              size=config.skip_channels,
+                              num_flatten_dims=2,
+                              relu=True,
+                              act="relu")

        if config.loss_type == "softmax":
-            self.fc3 = modules.FC(
-                self.full_name(),
-                in_features=config.skip_channels,
-                size=config.num_channels,
-                num_flatten_dims=2,
-                relu=False)
+            self.fc3 = modules.FC(self.full_name(),
+                                  in_features=config.skip_channels,
+                                  size=config.num_channels,
+                                  num_flatten_dims=2,
+                                  relu=False)
        elif config.loss_type == "mix-gaussian-pdf":
-            self.fc3 = modules.FC(
-                self.full_name(),
-                in_features=config.skip_channels,
-                size=3 * config.num_mixtures,
-                num_flatten_dims=2,
-                relu=False)
+            self.fc3 = modules.FC(self.full_name(),
+                                  in_features=config.skip_channels,
+                                  size=3 * config.num_mixtures,
+                                  num_flatten_dims=2,
+                                  relu=False)
        else:
-            raise ValueError(
-                "loss_type {} is unsupported!".format(loss_type))
+            raise ValueError("loss_type {} is unsupported!".format(loss_type))

    def sample_softmax(self, mix_parameters):
        batch, length, hidden = mix_parameters.shape
        mix_param_2d = fluid.layers.reshape(mix_parameters,
-            [batch * length, hidden])
+                                            [batch * length, hidden])
        mix_param_2d = fluid.layers.softmax(mix_param_2d, axis=-1)

        # quantized: [batch * length]
-        quantized = fluid.layers.cast(fluid.layers.sampling_id(mix_param_2d),
-            dtype="float32")
+        quantized = fluid.layers.cast(
+            fluid.layers.sampling_id(mix_param_2d), dtype="float32")
        samples = (quantized + 0.5) * (2.0 / self.config.num_channels) - 1.0

        # samples: [batch * length]
@ -162,13 +167,13 @@ class WaveNetModule(dg.Layer):
        # to [bs * len, 3 * num_mixtures].
        batch, length, hidden = mix_parameters.shape
        mix_param_2d = fluid.layers.reshape(mix_parameters,
-            [batch * length, hidden])
+                                            [batch * length, hidden])
        K = hidden // 3

        # Unpack the parameters of the mixture of gaussian.
-        logits_pi = mix_param_2d[:, 0 : K]
-        mu = mix_param_2d[:, K : 2*K]
-        log_s = mix_param_2d[:, 2*K : 3*K]
+        logits_pi = mix_param_2d[:, 0:K]
+        mu = mix_param_2d[:, K:2 * K]
+        log_s = mix_param_2d[:, 2 * K:3 * K]
        s = fluid.layers.exp(log_s)

        pi = fluid.layers.softmax(logits_pi, axis=-1)
@ -220,8 +225,9 @@ class WaveNetModule(dg.Layer):

        # Calculate gaussian loss.
        targets = fluid.layers.unsqueeze(targets, -1)
-        targets = fluid.layers.expand(targets, [1, 1, self.config.num_mixtures])
-        x_std =  inv_s * (targets - mu)
+        targets = fluid.layers.expand(targets,
+                                      [1, 1, self.config.num_mixtures])
+        x_std = inv_s * (targets - mu)
        exponent = fluid.layers.exp(-0.5 * x_std * x_std)
        pdf_x = 1.0 / np.sqrt(2.0 * np.pi) * inv_s * exponent
        pdf_x = pi * pdf_x
@ -239,8 +245,8 @@ class WaveNetModule(dg.Layer):

        # Slice conditioners.
        audio_length = audios.shape[1]
-        conditioner = extract_slices(full_conditioner,
-            audio_starts, audio_length, self.rank)
+        conditioner = extract_slices(full_conditioner, audio_starts,
+                                     audio_length, self.rank)

        # input_audio, target_audio: [bs, len]
        input_audios = audios[:, :-1]
@ -263,15 +269,16 @@ class WaveNetModule(dg.Layer):
            layer_input = self.embedding_fc(
                fluid.layers.unsqueeze(input_audios, 2))
        else:
-            raise ValueError(
-                "loss_type {} is unsupported!".format(loss_type))
+            raise ValueError("loss_type {} is unsupported!".format(loss_type))

        # layer_input: [bs, res_channel, 1, len]
        layer_input = fluid.layers.unsqueeze(
-            fluid.layers.transpose(layer_input, perm=[0, 2, 1]), 2)
+            fluid.layers.transpose(
+                layer_input, perm=[0, 2, 1]), 2)
        # conditioner: [bs, mel_bands, 1, len]
        conditioner = fluid.layers.unsqueeze(
-            fluid.layers.transpose(conditioner, perm=[0, 2, 1]), 2)
+            fluid.layers.transpose(
+                conditioner, perm=[0, 2, 1]), 2)

        skip = None
        for i, layer in enumerate(self.dilated_causal_convs):
@ -292,17 +299,16 @@ class WaveNetModule(dg.Layer):
            elif loss_type == "mix-gaussian-pdf":
                sample_audios = self.sample_mix_gaussian(mix_parameters)
            else:
-                raise ValueError(
-                    "loss_type {} is unsupported!".format(loss_type))
+                raise ValueError("loss_type {} is unsupported!".format(
+                    loss_type))

        if loss_type == "softmax":
            loss = self.softmax_loss(target_audios, mix_parameters)
        elif loss_type == "mix-gaussian-pdf":
-            loss = self.mixture_density_loss(target_audios,
-                mix_parameters, self.log_scale_min)
+            loss = self.mixture_density_loss(target_audios, mix_parameters,
+                                             self.log_scale_min)
        else:
-            raise ValueError(
-                "loss_type {} is unsupported!".format(loss_type))
+            raise ValueError("loss_type {} is unsupported!".format(loss_type))

        return loss, sample_audios

@ -335,22 +341,23 @@ class WaveNetModule(dg.Layer):
            elif loss_type == "mix-gaussian-pdf":
                audio_input = self.embedding_fc(current_sample)
            else:
-                raise ValueError(
-                    "loss_type {} is unsupported!".format(loss_type))
+                raise ValueError("loss_type {} is unsupported!".format(
+                    loss_type))

            # [bs, channel, 1, 1]
            audio_input = fluid.layers.unsqueeze(
-                fluid.layers.transpose(audio_input, perm=[0, 2, 1]), 2)
+                fluid.layers.transpose(
+                    audio_input, perm=[0, 2, 1]), 2)
            # [bs, mel_bands]
            cond_input = conditioner[:, i, :]
            # [bs, mel_bands, 1, 1]
-            cond_input = fluid.layers.reshape(
-                cond_input, cond_input.shape + [1, 1])
+            cond_input = fluid.layers.reshape(cond_input,
+                                              cond_input.shape + [1, 1])

            skip = None
            for layer in self.dilated_causal_convs:
-                audio_input, skip = layer.add_input(
-                    audio_input, skip, cond_input)
+                audio_input, skip = layer.add_input(audio_input, skip,
+                                                    cond_input)

            # [bs, 1, channel]
            skip = fluid.layers.transpose(
@ -361,14 +368,14 @@ class WaveNetModule(dg.Layer):
            elif loss_type == "mix-gaussian-pdf":
                sample = self.sample_mix_gaussian(mix_parameters)
            else:
-                raise ValueError(
-                    "loss_type {} is unsupported!".format(loss_type))
+                raise ValueError("loss_type {} is unsupported!".format(
+                    loss_type))
            audio_samples.append(sample)
            # [bs]
            current_sample = audio_samples[-1]
            # [bs, 1, 1]
-            current_sample = fluid.layers.reshape(current_sample,
-                current_sample.shape + [1, 1])
+            current_sample = fluid.layers.reshape(
+                current_sample, current_sample.shape + [1, 1])

        # syn_audio: [num_samples]
        syn_audio = fluid.layers.concat(audio_samples, axis=0).numpy()
--- a/parakeet/modules/init.py
+++ b/parakeet/modules/init.py
@ -1,2 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from . import weight_norm
 from .customized import *
--- a/parakeet/modules/customized.py
+++ b/parakeet/modules/customized.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle import fluid
 import paddle.fluid.layers as F
 import paddle.fluid.dygraph as dg
@ -7,6 +21,7 @@ class Pool1D(dg.Layer):
    """
    A Pool 1D block implemented with Pool2D.
    """
+
    def __init__(self,
                 pool_size=-1,
                 pool_type='max',
@ -28,12 +43,15 @@ class Pool1D(dg.Layer):
        self.exclusive = exclusive
        self.data_format = data_format

-
-        self.pool2d = dg.Pool2D([1,pool_size], pool_type = pool_type,
-                                pool_stride = [1,pool_stride], pool_padding = [0, pool_padding],
-                                global_pooling = global_pooling, use_cudnn = use_cudnn,
-                                ceil_mode = ceil_mode, exclusive = exclusive)
-
+        self.pool2d = dg.Pool2D(
+            [1, pool_size],
+            pool_type=pool_type,
+            pool_stride=[1, pool_stride],
+            pool_padding=[0, pool_padding],
+            global_pooling=global_pooling,
+            use_cudnn=use_cudnn,
+            ceil_mode=ceil_mode,
+            exclusive=exclusive)

    def forward(self, x):
        """
@ -53,12 +71,14 @@ class Pool1D(dg.Layer):
            x = fluid.layers.transpose(x, [0, 2, 1])
        return x

+
 class Conv1D(dg.Conv2D):
    """A standard Conv1D layer that use (B, C, T) data layout. It inherit Conv2D and 
    use (B, C, 1, T) data layout to compute 1D convolution. Nothing more.
    NOTE: we inherit Conv2D instead of encapsulate a Conv2D layer to make it a simple
    layer, instead of a complex one. So we can easily apply weight norm to it.
    """
+
    def __init__(self,
                 num_channels,
                 num_filters,
@ -72,17 +92,18 @@ class Conv1D(dg.Conv2D):
                 use_cudnn=True,
                 act=None,
                 dtype='float32'):
-        super(Conv1D, self).__init__(num_channels,
-                                     num_filters, (1, filter_size),
-                                     stride=(1, stride),
-                                     padding=(0, padding),
-                                     dilation=(1, dilation),
-                                     groups=groups,
-                                     param_attr=param_attr,
-                                     bias_attr=bias_attr,
-                                     use_cudnn=use_cudnn,
-                                     act=act,
-                                     dtype=dtype)
+        super(Conv1D, self).__init__(
+            num_channels,
+            num_filters, (1, filter_size),
+            stride=(1, stride),
+            padding=(0, padding),
+            dilation=(1, dilation),
+            groups=groups,
+            param_attr=param_attr,
+            bias_attr=bias_attr,
+            use_cudnn=use_cudnn,
+            act=act,
+            dtype=dtype)

    def forward(self, x):
        x = F.unsqueeze(x, [2])
@ -105,18 +126,19 @@ class Conv1DTranspose(dg.Conv2DTranspose):
                 use_cudnn=True,
                 act=None,
                 dtype='float32'):
-        super(Conv1DTranspose, self).__init__(num_channels,
-                                              num_filters, (1, filter_size),
-                                              output_size=None,
-                                              padding=(0, padding),
-                                              stride=(1, stride),
-                                              dilation=(1, dilation),
-                                              groups=groups,
-                                              param_attr=param_attr,
-                                              bias_attr=bias_attr,
-                                              use_cudnn=use_cudnn,
-                                              act=act,
-                                              dtype=dtype)
+        super(Conv1DTranspose, self).__init__(
+            num_channels,
+            num_filters, (1, filter_size),
+            output_size=None,
+            padding=(0, padding),
+            stride=(1, stride),
+            dilation=(1, dilation),
+            groups=groups,
+            param_attr=param_attr,
+            bias_attr=bias_attr,
+            use_cudnn=use_cudnn,
+            act=act,
+            dtype=dtype)

    def forward(self, x):
        x = F.unsqueeze(x, [2])
@ -134,6 +156,7 @@ class Conv1DCell(Conv1D):
    It is a cell that it acts like an RNN cell. It does not support stride > 1, and it
    ensures 1-to-1 mapping from input time steps to output timesteps.
    """
+
    def __init__(self,
                 num_channels,
                 num_filters,
@ -150,18 +173,19 @@ class Conv1DCell(Conv1D):
        padding = receptive_field - 1 if causal else receptive_field // 2
        self._receptive_field = receptive_field
        self.causal = causal
-        super(Conv1DCell, self).__init__(num_channels,
-                                         num_filters,
-                                         filter_size,
-                                         stride=1,
-                                         padding=padding,
-                                         dilation=dilation,
-                                         groups=groups,
-                                         param_attr=param_attr,
-                                         bias_attr=bias_attr,
-                                         use_cudnn=use_cudnn,
-                                         act=act,
-                                         dtype=dtype)
+        super(Conv1DCell, self).__init__(
+            num_channels,
+            num_filters,
+            filter_size,
+            stride=1,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            param_attr=param_attr,
+            bias_attr=bias_attr,
+            use_cudnn=use_cudnn,
+            act=act,
+            dtype=dtype)

    def forward(self, x):
        # it ensures that ouput time steps == input time steps
@ -189,15 +213,16 @@ class Conv1DCell(Conv1D):
    def add_input(self, x_t):
        batch_size, c_in, _ = x_t.shape
        if self._buffer is None:
-            self._buffer = F.zeros((batch_size, c_in, self.receptive_field),
-                                   dtype=x_t.dtype)
+            self._buffer = F.zeros(
+                (batch_size, c_in, self.receptive_field), dtype=x_t.dtype)
        self._buffer = F.concat([self._buffer[:, :, 1:], x_t], -1)
        if self._dilation[1] > 1:
-            input = F.strided_slice(self._buffer,
-                                    axes=[2],
-                                    starts=[0],
-                                    ends=[self.receptive_field],
-                                    strides=[self._dilation[1]])
+            input = F.strided_slice(
+                self._buffer,
+                axes=[2],
+                starts=[0],
+                ends=[self.receptive_field],
+                strides=[self._dilation[1]])
        else:
            input = self._buffer
        input = F.reshape(input, (batch_size, -1))
--- a/parakeet/modules/dynamic_gru.py
+++ b/parakeet/modules/dynamic_gru.py
@ -1,6 +1,20 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle.fluid.dygraph as dg
 import paddle.fluid.layers as layers

+
 class DynamicGRU(dg.Layer):
    def __init__(self,
                 size,
@ -49,4 +63,3 @@ class DynamicGRU(dg.Layer):
            res = res[::-1]
        res = layers.concat(res, axis=1)
        return res
-
--- a/parakeet/modules/ffn.py
+++ b/parakeet/modules/ffn.py
@ -1,3 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle.fluid.dygraph as dg
 import paddle.fluid.layers as layers
 import paddle.fluid as fluid
@ -7,28 +20,41 @@ from parakeet.modules.customized import Conv1D

 class PositionwiseFeedForward(dg.Layer):
    ''' A two-feed-forward-layer module '''
-    def __init__(self, d_in, num_hidden, filter_size, padding=0, use_cudnn=True, dropout=0.1):
+
+    def __init__(self,
+                 d_in,
+                 num_hidden,
+                 filter_size,
+                 padding=0,
+                 use_cudnn=True,
+                 dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.num_hidden = num_hidden
        self.use_cudnn = use_cudnn
        self.dropout = dropout

        k = math.sqrt(1 / d_in)
-        self.w_1 = Conv1D(num_channels = d_in, 
-                        num_filters = num_hidden, 
-                        filter_size = filter_size,
-                        padding=padding,
-                        param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                        bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
-                        use_cudnn = use_cudnn)
+        self.w_1 = Conv1D(
+            num_channels=d_in,
+            num_filters=num_hidden,
+            filter_size=filter_size,
+            padding=padding,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)),
+            use_cudnn=use_cudnn)
        k = math.sqrt(1 / num_hidden)
-        self.w_2 = Conv1D(num_channels = num_hidden,
-                        num_filters = d_in,
-                        filter_size = filter_size,
-                        padding=padding,
-                        param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
-                        bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
-                        use_cudnn = use_cudnn)
+        self.w_2 = Conv1D(
+            num_channels=num_hidden,
+            num_filters=d_in,
+            filter_size=filter_size,
+            padding=padding,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.XavierInitializer()),
+            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k)),
+            use_cudnn=use_cudnn)
        self.layer_norm = dg.LayerNorm(d_in)

    def forward(self, input):
@ -40,14 +66,14 @@ class PositionwiseFeedForward(dg.Layer):
        Returns:
            output (Variable), Shape(B, T, C), the result after FFN.
        """
-        x = layers.transpose(input, [0,2,1])
+        x = layers.transpose(input, [0, 2, 1])
        #FFN Networt
        x = self.w_2(layers.relu(self.w_1(x)))

        # dropout
        x = layers.dropout(x, self.dropout)

-        x = layers.transpose(x, [0,2,1])
+        x = layers.transpose(x, [0, 2, 1])
        # residual connection
        x = x + input

--- a/parakeet/modules/multihead_attention.py
+++ b/parakeet/modules/multihead_attention.py
@ -1,29 +1,53 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import math
 import numpy as np
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dg
 import paddle.fluid.layers as layers

+
 class Linear(dg.Layer):
-    def __init__(self, in_features, out_features, is_bias=True, dtype="float32"):
+    def __init__(self,
+                 in_features,
+                 out_features,
+                 is_bias=True,
+                 dtype="float32"):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.dtype = dtype
-        self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
-        self.bias  = is_bias
+        self.weight = fluid.ParamAttr(
+            initializer=fluid.initializer.XavierInitializer())
+        self.bias = is_bias

        if is_bias is not False:
            k = math.sqrt(1 / in_features)
-            self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
+            self.bias = fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+                low=-k, high=k))

-        self.linear = dg.Linear(in_features, out_features, param_attr = self.weight,
-                            bias_attr = self.bias,)
+        self.linear = dg.Linear(
+            in_features,
+            out_features,
+            param_attr=self.weight,
+            bias_attr=self.bias, )

    def forward(self, x):
        x = self.linear(x)
        return x

+
 class ScaledDotProductAttention(dg.Layer):
    def __init__(self, d_key):
        super(ScaledDotProductAttention, self).__init__()
@ -31,7 +55,13 @@ class ScaledDotProductAttention(dg.Layer):
        self.d_key = d_key

    # please attention this mask is diff from pytorch
-    def forward(self, key, value, query, mask=None, query_mask=None, dropout=0.1):
+    def forward(self,
+                key,
+                value,
+                query,
+                mask=None,
+                query_mask=None,
+                dropout=0.1):
        """
        Scaled Dot Product Attention.
        
@ -47,13 +77,14 @@ class ScaledDotProductAttention(dg.Layer):
            attention (Variable), Shape(n_head * B, T, C), the attention of key.
        """
        # Compute attention score
-        attention = layers.matmul(query, key, transpose_y=True) #transpose the last dim in y
+        attention = layers.matmul(
+            query, key, transpose_y=True)  #transpose the last dim in y
        attention = attention / math.sqrt(self.d_key)

        # Mask key to ignore padding
        if mask is not None:
            attention = attention * mask
-            mask = (mask == 0).astype(np.float32) * (-2 ** 32 + 1)
+            mask = (mask == 0).astype(np.float32) * (-2**32 + 1)
            attention = attention + mask

        attention = layers.softmax(attention)
@ -66,8 +97,16 @@ class ScaledDotProductAttention(dg.Layer):
        result = layers.matmul(attention, value)
        return result, attention

+
 class MultiheadAttention(dg.Layer):
-    def __init__(self, num_hidden, d_k, d_q, num_head=4, is_bias=False, dropout=0.1, is_concat=True):
+    def __init__(self,
+                 num_hidden,
+                 d_k,
+                 d_q,
+                 num_head=4,
+                 is_bias=False,
+                 dropout=0.1,
+                 is_concat=True):
        super(MultiheadAttention, self).__init__()
        self.num_hidden = num_hidden
        self.num_head = num_head
@ -109,28 +148,42 @@ class MultiheadAttention(dg.Layer):

        # repeat masks h times
        if query_mask is not None:
-            query_mask = layers.expand(query_mask, [self.num_head, 1, seq_len_key])
+            query_mask = layers.expand(query_mask,
+                                       [self.num_head, 1, seq_len_key])
        if mask is not None:
            mask = layers.expand(mask, (self.num_head, 1, 1))

-        
        # Make multihead attention
        # key & value.shape = (batch_size, seq_len, feature)(feature = num_head * num_hidden_per_attn)
-        key = layers.reshape(self.key(key), [batch_size, seq_len_key, self.num_head, self.d_k])
-        value = layers.reshape(self.value(value), [batch_size, seq_len_key, self.num_head, self.d_k])
-        query = layers.reshape(self.query(query_input), [batch_size, seq_len_query, self.num_head, self.d_q])
+        key = layers.reshape(
+            self.key(key), [batch_size, seq_len_key, self.num_head, self.d_k])
+        value = layers.reshape(
+            self.value(value),
+            [batch_size, seq_len_key, self.num_head, self.d_k])
+        query = layers.reshape(
+            self.query(query_input),
+            [batch_size, seq_len_query, self.num_head, self.d_q])

-        key = layers.reshape(layers.transpose(key, [2, 0, 1, 3]), [-1, seq_len_key, self.d_k])
-        value = layers.reshape(layers.transpose(value, [2, 0, 1, 3]), [-1, seq_len_key, self.d_k])
-        query = layers.reshape(layers.transpose(query, [2, 0, 1, 3]), [-1, seq_len_query, self.d_q])
+        key = layers.reshape(
+            layers.transpose(key, [2, 0, 1, 3]), [-1, seq_len_key, self.d_k])
+        value = layers.reshape(
+            layers.transpose(value, [2, 0, 1, 3]),
+            [-1, seq_len_key, self.d_k])
+        query = layers.reshape(
+            layers.transpose(query, [2, 0, 1, 3]),
+            [-1, seq_len_query, self.d_q])

-        result, attention = self.scal_attn(key, value, query, mask=mask, query_mask=query_mask)
+        result, attention = self.scal_attn(
+            key, value, query, mask=mask, query_mask=query_mask)

        # concat all multihead result
-        result = layers.reshape(result, [self.num_head, batch_size, seq_len_query, self.d_q])
-        result = layers.reshape(layers.transpose(result, [1,2,0,3]),[batch_size, seq_len_query, -1])
+        result = layers.reshape(
+            result, [self.num_head, batch_size, seq_len_query, self.d_q])
+        result = layers.reshape(
+            layers.transpose(result, [1, 2, 0, 3]),
+            [batch_size, seq_len_query, -1])
        if self.is_concat:
-            result = layers.concat([query_input,result], axis=-1)
+            result = layers.concat([query_input, result], axis=-1)
        result = layers.dropout(self.fc(result), self.dropout)
        result = result + query_input

--- a/parakeet/modules/weight_norm.py
+++ b/parakeet/modules/weight_norm.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 from paddle import fluid
 import paddle.fluid.dygraph as dg
--- a/parakeet/utils/layer_tools.py
+++ b/parakeet/utils/layer_tools.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 from torch import nn
 import paddle.fluid.dygraph as dg
@ -10,8 +24,8 @@ def summary(layer):
        print("{}|{}|{}".format(name, param.shape, np.prod(param.shape)))
        num_elements += np.prod(param.shape)
        num_params += 1
-    print("layer has {} parameters, {} elements.".format(
-        num_params, num_elements))
+    print("layer has {} parameters, {} elements.".format(num_params,
+                                                         num_elements))


 def freeze(layer):
@ -31,5 +45,5 @@ def torch_summary(layer):
        print("{}|{}|{}".format(name, param.shape, np.prod(param.shape)))
        num_elements += np.prod(param.shape)
        num_params += 1
-    print("layer has {} parameters, {} elements.".format(
-        num_params, num_elements))
+    print("layer has {} parameters, {} elements.".format(num_params,
+                                                         num_elements))
--- a/setup.py
+++ b/setup.py
@ -1,13 +1,27 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import io
 import re
 from setuptools import setup, find_packages

+
 def read(*names, **kwargs):
    with io.open(
-        os.path.join(os.path.dirname(__file__), *names),
-        encoding=kwargs.get("encoding", "utf8")
-    ) as fp:
+            os.path.join(os.path.dirname(__file__), *names),
+            encoding=kwargs.get("encoding", "utf8")) as fp:
        return fp.read()


@ -19,6 +33,7 @@ def find_version(*file_paths):
        return version_match.group(1)
    raise RuntimeError("Unable to find version string.")

+
 VERSION = find_version('parakeet', '__init__.py')
 long_description = read('README.md')

@ -32,17 +47,26 @@ setup_info = dict(
    description='Speech synthesis tools and models based on Paddlepaddle',
    long_description=long_description,
    license='Apache 2',
-
    install_requires=[
-        'numpy', 'nltk', 'inflect', 'librosa', 'unidecode', 'numba', 
-        'tqdm', 'matplotlib', 'tensorboardX', 'tensorboard', 'scipy',
-        'ruamel.yaml', 'pandas', 'sox', 'soundfile',  
+        'numpy',
+        'nltk',
+        'inflect',
+        'librosa',
+        'unidecode',
+        'numba',
+        'tqdm',
+        'matplotlib',
+        'tensorboardX',
+        'tensorboard',
+        'scipy',
+        'ruamel.yaml',
+        'pandas',
+        'sox',
+        'soundfile',
    ],

    # Package info
    packages=find_packages(exclude=('tests', 'tests.*')),
-
-    zip_safe=True,
-)
+    zip_safe=True, )

 setup(**setup_info)
--- a/tests/test_ljspeech.py
+++ b/tests/test_ljspeech.py
@ -1,3 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from parakeet.datasets.ljspeech import LJSpeech
 from parakeet.data.datacargo import DataCargo

--- a/tests/test_vctk.py
+++ b/tests/test_vctk.py
@ -1,11 +1,25 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from parakeet.datasets import vctk
 from pathlib import Path
 from parakeet.data.datacargo import DataCargo

 root = Path("/workspace/datasets/VCTK-Corpus")
 vctk_dataset = vctk.VCTK(root)
-vctk_cargo = DataCargo(vctk_dataset, batch_size=16, shuffle=True, drop_last=True)
+vctk_cargo = DataCargo(
+    vctk_dataset, batch_size=16, shuffle=True, drop_last=True)

 for i, batch in enumerate(vctk_cargo):
    print(i)
-
--- a/tools/copyright.hook
+++ b/tools/copyright.hook
@ -0,0 +1,121 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import io, re
+import sys, os
+import subprocess
+import platform
+
+COPYRIGHT = '''
+Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+LANG_COMMENT_MARK = None
+
+NEW_LINE_MARK = None
+
+COPYRIGHT_HEADER = None
+
+if platform.system() == "Windows":
+    NEW_LINE_MARK = "\r\n"
+else:
+    NEW_LINE_MARK = '\n'
+    COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
+    p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
+    process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
+    date, err = process.communicate()
+    date = date.decode("utf-8").rstrip("\n")
+    COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)
+
+
+def generate_copyright(template, lang='C'):
+    if lang == 'Python':
+        LANG_COMMENT_MARK = '#'
+    else:
+        LANG_COMMENT_MARK = "//"
+
+    lines = template.split(NEW_LINE_MARK)
+    BLANK = " "
+    ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
+    for lino, line in enumerate(lines):
+        if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
+        if len(line)  == 0:
+            BLANK = ""
+        else:
+            BLANK = " "
+        ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK
+
+    return ans + "\n"
+
+
+def lang_type(filename):
+    if filename.endswith(".py"):
+        return "Python"
+    elif filename.endswith(".h"):
+        return "C"
+    elif filename.endswith(".c"):
+        return "C"
+    elif filename.endswith(".hpp"):
+        return "C"
+    elif filename.endswith(".cc"):
+        return "C"
+    elif filename.endswith(".cpp"):
+        return "C"
+    elif filename.endswith(".cu"):
+        return "C"
+    elif filename.endswith(".cuh"):
+        return "C"
+    elif filename.endswith(".go"):
+        return "C"
+    elif filename.endswith(".proto"):
+        return "C"
+    else:
+        print("Unsupported filetype %s", filename)
+        exit(0)
+
+
+PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(
+        description='Checker for copyright declaration.')
+    parser.add_argument('filenames', nargs='*', help='Filenames to check')
+    args = parser.parse_args(argv)
+
+    retv = 0
+    for filename in args.filenames:
+        fd = io.open(filename, encoding="utf-8")
+        first_line = fd.readline()
+        second_line = fd.readline()
+        if "COPYRIGHT (C)" in first_line.upper(): continue
+        if first_line.startswith("#!") or PYTHON_ENCODE.match(
+                second_line) != None or PYTHON_ENCODE.match(first_line) != None:
+            continue
+        original_contents = io.open(filename, encoding="utf-8").read()
+        new_contents = generate_copyright(
+            COPYRIGHT, lang_type(filename)) + original_contents
+        print('Auto Insert Copyright Header {}'.format(filename))
+        retv = 1
+        with io.open(filename, 'w') as output_file:
+            output_file.write(new_contents)
+
+    return retv
+
+
+if __name__ == '__main__':
+    exit(main())