diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..7230df7
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,28 @@
+# This file is used by clang-format to autoformat paddle source code
+#
+# The clang-format is part of llvm toolchain.
+# It need to install llvm and clang to format source code style.
+#
+# The basic usage is,
+#   clang-format -i -style=file PATH/TO/SOURCE/CODE
+#
+# The -style=file implicit use ".clang-format" file located in one of
+# parent directory.
+# The -i means inplace change.
+#
+# The document of clang-format is
+#   http://clang.llvm.org/docs/ClangFormat.html
+#   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
+---
+Language:        Cpp
+BasedOnStyle:  Google
+IndentWidth:     4
+TabWidth:        4
+ContinuationIndentWidth: 4
+MaxEmptyLinesToKeep: 2
+AccessModifierOffset: -2  # The private/protected/public has no indent in class
+Standard:  Cpp11
+AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
+...
\ No newline at end of file
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..3723df5
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,50 @@
+[flake8]
+
+########## OPTIONS ##########
+# Set the maximum length that any line (with some exceptions) may be.
+max-line-length = 120
+
+
+################### FILE PATTERNS ##########################
+# Provide a comma-separated list of glob patterns to exclude from checks.
+exclude =
+    # git folder
+    .git,
+    # python cache
+    __pycache__,
+    third_party/,
+# Provide a comma-separate list of glob patterns to include for checks.
+filename =
+    *.py
+
+
+########## RULES ##########
+
+# ERROR CODES
+#
+# E/W  - PEP8 errors/warnings (pycodestyle)
+# F    - linting errors (pyflakes)
+# C    - McCabe complexity error (mccabe)
+#
+# W503 - line break before binary operator
+
+# Specify a list of codes to ignore.
+ignore =
+    W503
+    E252,E262,E127,E265,E126,E266,E241,E261,E128,E125
+    W291,W293,W605
+    E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
+    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
+    # to line this up with executable bit
+    EXE001,
+    # these ignores are from flake8-bugbear; please fix!
+    B007,B008,
+    # these ignores are from flake8-comprehensions; please fix!
+    C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
+
+# Specify the list of error codes you wish Flake8 to report.
+select =
+    E,
+    W,
+    F,
+    C
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6f222bb..cde2cc0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,11 +1,11 @@
-repos:
--   repo: https://github.com/PaddlePaddle/mirrors-yapf.git
-    rev: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+-   repo: https://github.com/pre-commit/mirrors-yapf.git
+    sha: v0.16.0
     hooks:
     -   id: yapf
         files: \.py$
+        exclude: (?=third_party).*(\.py)$
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: a11d9314b22d8f8c7556443875b731ef05965464
+    sha: a11d9314b22d8f8c7556443875b731ef05965464
     hooks:
     -   id: check-merge-conflict
     -   id: check-symlinks
@@ -15,8 +15,23 @@ repos:
         files: \.md$
     -   id: trailing-whitespace
         files: \.md$
--   repo: https://github.com/Lucas-C/pre-commit-hooks
-    rev: v1.0.1
+    -   id: requirements-txt-fixer
+        exclude: (?=third_party).*$
+    -   id: check-yaml
+    -   id: check-json
+    -   id: pretty-format-json
+        args:
+        - --no-sort-keys
+        - --autofix
+    -   id: check-merge-conflict
+    -   id: flake8
+        aergs:
+        -  --ignore=E501,E228,E226,E261,E266,E128,E402,W503
+        -  --builtins=G,request
+        -  --jobs=1
+        exclude: (?=third_party).*(\.py)$
+-   repo : https://github.com/Lucas-C/pre-commit-hooks
+    sha: v1.0.1
     hooks:
     -   id: forbid-crlf
         files: \.md$
@@ -28,9 +43,15 @@ repos:
         files: \.md$
 -   repo: local
     hooks:
+    -   id: clang-format
+        name: clang-format
+        description: Format files with ClangFormat
+        entry: bash .pre-commit-hooks/clang-format.hook -i
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
     -   id: copyright_checker
         name: copyright_checker
-        entry: python ./tools/copyright.hook
+        entry: python .pre-commit-hooks/copyright-check.hook
         language: system
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
-        exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
+        exclude: (?=third_party|pypinyin).*(\.cpp|\.h|\.py)$
diff --git a/.pre-commit-hooks/clang-format.hook b/.pre-commit-hooks/clang-format.hook
new file mode 100755
index 0000000..ceb4a7e
--- /dev/null
+++ b/.pre-commit-hooks/clang-format.hook
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+set -e
+
+readonly VERSION="3.9"
+
+version=$(clang-format -version)
+
+# if ! [[ $version == *"$VERSION"* ]]; then
+#     echo "clang-format version check failed."
+#     echo "a version contains '$VERSION' is needed, but get '$version'"
+#     echo "you can install the right version, and make an soft-link to '\$PATH' env"
+#     exit -1
+# fi
+
+clang-format $@
diff --git a/.pre-commit-hooks/copyright-check.hook b/.pre-commit-hooks/copyright-check.hook
new file mode 100644
index 0000000..80a5315
--- /dev/null
+++ b/.pre-commit-hooks/copyright-check.hook
@@ -0,0 +1,133 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import io
+import os
+import re
+import sys
+import subprocess
+import platform
+
+COPYRIGHT = '''
+Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+LANG_COMMENT_MARK = None
+
+NEW_LINE_MARK = None
+
+COPYRIGHT_HEADER = None
+
+if platform.system() == "Windows":
+    NEW_LINE_MARK = "\r\n"
+else:
+    NEW_LINE_MARK = '\n'
+    COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
+    p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
+    process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
+    date, err = process.communicate()
+    date = date.decode("utf-8").rstrip("\n")
+    COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)
+
+
+def generate_copyright(template, lang='C'):
+    if lang == 'Python':
+        LANG_COMMENT_MARK = '#'
+    else:
+        LANG_COMMENT_MARK = "//"
+
+    lines = template.split(NEW_LINE_MARK)
+    BLANK = " "
+    ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
+    for lino, line in enumerate(lines):
+        if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
+        if len(line)  == 0:
+            BLANK = ""
+        else:
+            BLANK = " "
+        ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK
+
+    return ans + "\n"
+
+
+def lang_type(filename):
+    if filename.endswith(".py"):
+        return "Python"
+    elif filename.endswith(".h"):
+        return "C"
+    elif filename.endswith(".c"):
+        return "C"
+    elif filename.endswith(".hpp"):
+        return "C"
+    elif filename.endswith(".cc"):
+        return "C"
+    elif filename.endswith(".cpp"):
+        return "C"
+    elif filename.endswith(".cu"):
+        return "C"
+    elif filename.endswith(".cuh"):
+        return "C"
+    elif filename.endswith(".go"):
+        return "C"
+    elif filename.endswith(".proto"):
+        return "C"
+    else:
+        print("Unsupported filetype %s", filename)
+        exit(0)
+
+
+PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(
+        description='Checker for copyright declaration.')
+    parser.add_argument('filenames', nargs='*', help='Filenames to check')
+    args = parser.parse_args(argv)
+
+    retv = 0
+    for filename in args.filenames:
+        fd = io.open(filename, encoding="utf-8")
+        first_line = fd.readline()
+        second_line = fd.readline()
+        if "COPYRIGHT (C)" in first_line.upper(): continue
+        if first_line.startswith("#!") or PYTHON_ENCODE.match(
+                second_line) != None or PYTHON_ENCODE.match(first_line) != None:
+            continue
+        original_contents = io.open(filename, encoding="utf-8").read()
+        new_contents = generate_copyright(
+            COPYRIGHT, lang_type(filename)) + original_contents
+        print('Auto Insert Copyright Header {}'.format(filename))
+        retv = 1
+        with io.open(filename, 'w') as output_file:
+            output_file.write(new_contents)
+
+    return retv
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/.style.yapf b/.style.yapf
new file mode 100644
index 0000000..b62febf
--- /dev/null
+++ b/.style.yapf
@@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index bddd217..c7afa7a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -11,15 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 # Configuration file for the Sphinx documentation builder.
 #
 # This file only contains a selection of the most common options. For a full
 # list see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
-
 # -- Path setup --------------------------------------------------------------
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
diff --git a/examples/fastspeech2/baker/batch_fn.py b/examples/fastspeech2/baker/batch_fn.py
index 1bbab84..0dd93dd 100644
--- a/examples/fastspeech2/baker/batch_fn.py
+++ b/examples/fastspeech2/baker/batch_fn.py
@@ -11,9 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import numpy as np
 import paddle
+
 from parakeet.data.batch import batch_sequences
 
 
@@ -24,8 +24,7 @@ def collate_baker_examples(examples):
     pitch = [np.array(item["pitch"], dtype=np.float32) for item in examples]
     energy = [np.array(item["energy"], dtype=np.float32) for item in examples]
     durations = [
-        np.array(
-            item["durations"], dtype=np.int64) for item in examples
+        np.array(item["durations"], dtype=np.int64) for item in examples
     ]
     text_lengths = np.array([item["text_lengths"] for item in examples])
     speech_lengths = np.array([item["speech_lengths"] for item in examples])
@@ -54,4 +53,4 @@ def collate_baker_examples(examples):
         "pitch": pitch,
         "energy": energy
     }
-    return batch
\ No newline at end of file
+    return batch
diff --git a/examples/fastspeech2/baker/compute_statistics.py b/examples/fastspeech2/baker/compute_statistics.py
index aa4bf4f..823223a 100644
--- a/examples/fastspeech2/baker/compute_statistics.py
+++ b/examples/fastspeech2/baker/compute_statistics.py
@@ -12,18 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Calculate statistics of feature files."""
-
 import argparse
 import logging
 from pathlib import Path
 
 import jsonlines
 import numpy as np
-from parakeet.datasets.data_table import DataTable
+from config import get_cfg_default
 from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm
 
-from config import get_cfg_default
+from parakeet.datasets.data_table import DataTable
 
 
 def main():
@@ -75,8 +74,8 @@ def main():
 
     # check directory existence
     if args.output is None:
-        args.output = Path(args.metadata).parent.with_name(args.field_name +
-                                                           "_stats.npy")
+        args.output = Path(
+            args.metadata).parent.with_name(args.field_name + "_stats.npy")
     else:
         args.output = Path(args.output)
     args.output.parent.mkdir(parents=True, exist_ok=True)
diff --git a/examples/fastspeech2/baker/config.py b/examples/fastspeech2/baker/config.py
index 7cf3d95..500f5bd 100644
--- a/examples/fastspeech2/baker/config.py
+++ b/examples/fastspeech2/baker/config.py
@@ -11,11 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from pathlib import Path
 
-from yacs.config import CfgNode as Configuration
 import yaml
+from yacs.config import CfgNode as Configuration
 
 config_path = (Path(__file__).parent / "conf" / "default.yaml").resolve()
 
diff --git a/examples/fastspeech2/baker/fastspeech2_updater.py b/examples/fastspeech2/baker/fastspeech2_updater.py
index 884efda..e10620b 100644
--- a/examples/fastspeech2/baker/fastspeech2_updater.py
+++ b/examples/fastspeech2/baker/fastspeech2_updater.py
@@ -11,8 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from parakeet.models.fastspeech2 import FastSpeech2, FastSpeech2Loss
+from parakeet.models.fastspeech2 import FastSpeech2Loss
 from parakeet.training.extensions.evaluator import StandardEvaluator
 from parakeet.training.reporter import report
 from parakeet.training.updaters.standard_updater import StandardUpdater
diff --git a/examples/fastspeech2/baker/frontend.py b/examples/fastspeech2/baker/frontend.py
index 3ed7efb..4a2f2c6 100644
--- a/examples/fastspeech2/baker/frontend.py
+++ b/examples/fastspeech2/baker/frontend.py
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import re
+
 import numpy as np
 import paddle
+
 from parakeet.frontend.cn_frontend import Frontend as cnFrontend
 
 
@@ -87,8 +88,7 @@ class Frontend():
                     phones.append(phone)
         return phones, tones
 
-    def get_input_ids(self, sentence, merge_sentences=True,
-                      get_tone_ids=False):
+    def get_input_ids(self, sentence, merge_sentences=True, get_tone_ids=False):
         phonemes = self.frontend.get_phonemes(
             sentence, merge_sentences=merge_sentences)
         result = {}
diff --git a/examples/fastspeech2/baker/gen_duration_from_textgrid.py b/examples/fastspeech2/baker/gen_duration_from_textgrid.py
index b3a39d3..aaece61 100644
--- a/examples/fastspeech2/baker/gen_duration_from_textgrid.py
+++ b/examples/fastspeech2/baker/gen_duration_from_textgrid.py
@@ -11,16 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import argparse
 import os
 from pathlib import Path
 
 import librosa
 import numpy as np
-from praatio import tgio
-
 from config import get_cfg_default
+from praatio import tgio
 
 
 def readtg(config, tg_path):
diff --git a/examples/fastspeech2/baker/normalize.py b/examples/fastspeech2/baker/normalize.py
index adaa0ab..6e59744 100644
--- a/examples/fastspeech2/baker/normalize.py
+++ b/examples/fastspeech2/baker/normalize.py
@@ -50,10 +50,7 @@ def main():
         required=True,
         help="speech statistics file.")
     parser.add_argument(
-        "--pitch-stats",
-        type=str,
-        required=True,
-        help="pitch statistics file.")
+        "--pitch-stats", type=str, required=True, help="pitch statistics file.")
     parser.add_argument(
         "--energy-stats",
         type=str,
diff --git a/examples/fastspeech2/baker/preprocess.py b/examples/fastspeech2/baker/preprocess.py
index a8b597e..c28e280 100644
--- a/examples/fastspeech2/baker/preprocess.py
+++ b/examples/fastspeech2/baker/preprocess.py
@@ -262,10 +262,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Preprocess audio and then extract features.")
     parser.add_argument(
-        "--rootdir",
-        default=None,
-        type=str,
-        help="directory to baker dataset.")
+        "--rootdir", default=None, type=str, help="directory to baker dataset.")
     parser.add_argument(
         "--dur-file",
         default=None,
diff --git a/examples/fastspeech2/baker/synthesize.py b/examples/fastspeech2/baker/synthesize.py
index 6770189..f6304eb 100644
--- a/examples/fastspeech2/baker/synthesize.py
+++ b/examples/fastspeech2/baker/synthesize.py
@@ -67,8 +67,7 @@ def evaluate(args, fastspeech2_config, pwg_config):
     std = paddle.to_tensor(std)
     pwg_normalizer = ZScore(mu, std)
 
-    fastspeech2_inferencce = FastSpeech2Inference(fastspeech2_normalizer,
-                                                  model)
+    fastspeech2_inferencce = FastSpeech2Inference(fastspeech2_normalizer, model)
     pwg_inference = PWGInference(pwg_normalizer, vocoder)
 
     output_dir = Path(args.output_dir)
diff --git a/examples/fastspeech2/baker/train.py b/examples/fastspeech2/baker/train.py
index 741678b..39b6fbc 100644
--- a/examples/fastspeech2/baker/train.py
+++ b/examples/fastspeech2/baker/train.py
@@ -154,8 +154,7 @@ def train_sp(args, config):
     output_dir = Path(args.output_dir)
     trainer = Trainer(updater, (config.max_epoch, 'epoch'), output_dir)
 
-    evaluator = FastSpeech2Evaluator(model, dev_dataloader,
-                                     **config["updater"])
+    evaluator = FastSpeech2Evaluator(model, dev_dataloader, **config["updater"])
 
     if dist.get_rank() == 0:
         trainer.extend(evaluator, trigger=(1, "epoch"))
diff --git a/examples/ge2e/audio_processor.py b/examples/ge2e/audio_processor.py
index 65a6aee..921e999 100644
--- a/examples/ge2e/audio_processor.py
+++ b/examples/ge2e/audio_processor.py
@@ -30,9 +30,7 @@ except ModuleNotFoundError:
 INT16_MAX = (2**15) - 1
 
 
-def normalize_volume(wav,
-                     target_dBFS,
-                     increase_only=False,
+def normalize_volume(wav, target_dBFS, increase_only=False,
                      decrease_only=False):
     # this function implements Loudness normalization, instead of peak
     # normalization, See https://en.wikipedia.org/wiki/Audio_normalization
@@ -44,8 +42,9 @@ def normalize_volume(wav,
     if increase_only and decrease_only:
         raise ValueError("Both increase only and decrease only are set")
     dBFS_change = target_dBFS - 10 * np.log10(np.mean(wav**2))
-    if ((dBFS_change < 0 and increase_only) or
-        (dBFS_change > 0 and decrease_only)):
+    if dBFS_change < 0 and increase_only:
+        return wav
+    if dBFS_change > 0 and decrease_only:
         return wav
     gain = 10**(dBFS_change / 20)
     return wav * gain
@@ -59,9 +58,14 @@ def trim_long_silences(wav,
     """
     Ensures that segments without voice in the waveform remain no longer than a
     threshold determined by the VAD parameters in params.py.
-
-    :param wav: the raw waveform as a numpy array of floats
-    :return: the same waveform with silences trimmed away (length <= original wav length)
+    Parameters
+    ----------
+    wav : np.array
+        the raw waveform as a numpy array of floats
+    Returns
+    ----------
+    np.array
+        the same waveform with silences trimmed away (length <= original wav length)
     """
     # Compute the voice detection window size
     samples_per_window = (vad_window_length * sampling_rate) // 1000
@@ -117,20 +121,25 @@ def compute_partial_slices(n_samples: int,
 
     The returned ranges may be indexing further than the length of the waveform. It is
     recommended that you pad the waveform with zeros up to wave_slices[-1].stop.
+    Parameters
+    ----------
+    n_samples : int
+        the number of samples in the waveform.
+    partial_utterance_n_frames : int
+        the number of mel spectrogram frames in each partial utterance.
 
-    :param n_samples: the number of samples in the waveform
-    :param partial_utterance_n_frames: the number of mel spectrogram frames in each partial
-    utterance
-    :param min_pad_coverage: when reaching the last partial utterance, it may or may not have
-    enough frames. If at least <min_pad_coverage> of <partial_utterance_n_frames> are present,
-    then the last partial utterance will be considered, as if we padded the audio. Otherwise,
-    it will be discarded, as if we trimmed the audio. If there aren't enough frames for 1 partial
-    utterance, this parameter is ignored so that the function always returns at least 1 slice.
-    :param overlap: by how much the partial utterance should overlap. If set to 0, the partial
-    utterances are entirely disjoint.
-    :return: the waveform slices and mel spectrogram slices as lists of array slices. Index
-    respectively the waveform and the mel spectrogram with these slices to obtain the partial
-    utterances.
+    min_pad_coverage : int 
+        when reaching the last partial utterance, it may or may not have enough frames.
+        If at least <min_pad_coverage> of <partial_utterance_n_frames> are present,
+        then the last partial utterance will be considered, as if we padded the audio. Otherwise,
+        it will be discarded, as if we trimmed the audio. If there aren't enough frames for 1 partial
+        utterance, this parameter is ignored so that the function always returns at least 1 slice.
+    overlap : float
+        by how much the partial utterance should overlap. If set to 0, the partial utterances are entirely disjoint.
+    Returns
+    ----------
+        the waveform slices and mel spectrogram slices as lists of array slices. 
+        Index respectively the waveform and the mel spectrogram with these slices to obtain the partialutterances.
     """
     assert 0 <= overlap < 1
     assert 0 < min_pad_coverage <= 1
@@ -138,8 +147,8 @@ def compute_partial_slices(n_samples: int,
     # librosa's function to compute num_frames from num_samples
     n_frames = int(np.ceil((n_samples + 1) / hop_length))
     # frame shift between ajacent partials
-    frame_step = max(
-        1, int(np.round(partial_utterance_n_frames * (1 - overlap))))
+    frame_step = max(1,
+                     int(np.round(partial_utterance_n_frames * (1 - overlap))))
 
     # Compute the slices
     wav_slices, mel_slices = [], []
diff --git a/examples/ge2e/dataset_processors.py b/examples/ge2e/dataset_processors.py
index 5e4fed9..50a8f3e 100644
--- a/examples/ge2e/dataset_processors.py
+++ b/examples/ge2e/dataset_processors.py
@@ -57,7 +57,7 @@ def _process_speaker(speaker_dir: Path,
         try:
             with sources_fpath.open("rt") as sources_file:
                 existing_names = {line.split(",")[0] for line in sources_file}
-        except:
+        except Exception as e:
             existing_names = {}
     else:
         existing_names = {}
@@ -114,9 +114,7 @@ def process_librispeech(processor,
                      output_dir, "*.flac", skip_existing)
 
 
-def process_voxceleb1(processor,
-                      datasets_root,
-                      output_dir,
+def process_voxceleb1(processor, datasets_root, output_dir,
                       skip_existing=False):
     dataset_name = "VoxCeleb1"
     dataset_root = datasets_root / dataset_name
@@ -126,10 +124,7 @@ def process_voxceleb1(processor,
         metadata = [line.strip().split("\t") for line in metafile][1:]
 
     # speaker id -> nationality
-    nationalities = {
-        line[0]: line[3]
-        for line in metadata if line[-1] == "dev"
-    }
+    nationalities = {line[0]: line[3] for line in metadata if line[-1] == "dev"}
     keep_speaker_ids = [
         speaker_id for speaker_id, nationality in nationalities.items()
         if nationality.lower() in anglophone_nationalites
@@ -147,9 +142,7 @@ def process_voxceleb1(processor,
                      output_dir, "*.wav", skip_existing)
 
 
-def process_voxceleb2(processor,
-                      datasets_root,
-                      output_dir,
+def process_voxceleb2(processor, datasets_root, output_dir,
                       skip_existing=False):
     dataset_name = "VoxCeleb2"
     dataset_root = datasets_root / dataset_name
@@ -171,9 +164,7 @@ def process_aidatatang_200zh(processor,
                      output_dir, "*.wav", skip_existing)
 
 
-def process_magicdata(processor,
-                      datasets_root,
-                      output_dir,
+def process_magicdata(processor, datasets_root, output_dir,
                       skip_existing=False):
     dataset_name = "magicdata/train"
     dataset_root = datasets_root / dataset_name
diff --git a/examples/ge2e/preprocess.py b/examples/ge2e/preprocess.py
index 615a71e..b1e5946 100644
--- a/examples/ge2e/preprocess.py
+++ b/examples/ge2e/preprocess.py
@@ -52,7 +52,8 @@ if __name__ == "__main__":
     if not args.no_trim:
         try:
             import webrtcvad
-        except:
+            print(webrtcvad.__version__)
+        except Exception as e:
             raise ModuleNotFoundError(
                 "Package 'webrtcvad' not found. This package enables "
                 "noise removal and is recommended. Please install and "
@@ -96,5 +97,5 @@ if __name__ == "__main__":
 
     for dataset in args.datasets:
         print("Preprocessing %s" % dataset)
-        preprocess_func[dataset](processor, args.datasets_root,
-                                 args.output_dir, args.skip_existing)
+        preprocess_func[dataset](processor, args.datasets_root, args.output_dir,
+                                 args.skip_existing)
diff --git a/examples/ge2e/train.py b/examples/ge2e/train.py
index f015472..950d486 100644
--- a/examples/ge2e/train.py
+++ b/examples/ge2e/train.py
@@ -83,12 +83,11 @@ class Ge2eExperiment(ExperimentBase):
         self.logger.info(msg)
 
         if dist.get_rank() == 0:
-            self.visualizer.add_scalar("train/loss", loss_value,
-                                       self.iteration)
+            self.visualizer.add_scalar("train/loss", loss_value, self.iteration)
             self.visualizer.add_scalar("train/eer", eer, self.iteration)
-            self.visualizer.add_scalar(
-                "param/w",
-                float(self.model_core.similarity_weight), self.iteration)
+            self.visualizer.add_scalar("param/w",
+                                       float(self.model_core.similarity_weight),
+                                       self.iteration)
             self.visualizer.add_scalar("param/b",
                                        float(self.model_core.similarity_bias),
                                        self.iteration)
diff --git a/examples/parallelwave_gan/baker/batch_fn.py b/examples/parallelwave_gan/baker/batch_fn.py
index 11a45c5..925303b 100644
--- a/examples/parallelwave_gan/baker/batch_fn.py
+++ b/examples/parallelwave_gan/baker/batch_fn.py
@@ -109,8 +109,7 @@ class Clip(object):
 
         """
         if len(x) < c.shape[1] * self.hop_size:
-            x = np.pad(x, (0, c.shape[1] * self.hop_size - len(x)),
-                       mode="edge")
+            x = np.pad(x, (0, c.shape[1] * self.hop_size - len(x)), mode="edge")
 
         # check the legnth is valid
         assert len(x) == c.shape[
diff --git a/examples/parallelwave_gan/baker/compute_statistics.py b/examples/parallelwave_gan/baker/compute_statistics.py
index 06b9b65..2a0c458 100644
--- a/examples/parallelwave_gan/baker/compute_statistics.py
+++ b/examples/parallelwave_gan/baker/compute_statistics.py
@@ -17,18 +17,12 @@ import argparse
 import logging
 import os
 
-import numpy as np
-import yaml
-import json
 import jsonlines
-
+import numpy as np
+from parakeet.datasets.data_table import DataTable
 from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm
 
-from parakeet.datasets.data_table import DataTable
-from parakeet.utils.h5_utils import read_hdf5
-from parakeet.utils.h5_utils import write_hdf5
-
 from config import get_cfg_default
 
 
diff --git a/examples/parallelwave_gan/baker/normalize.py b/examples/parallelwave_gan/baker/normalize.py
index 0cf2841..06a3dd2 100644
--- a/examples/parallelwave_gan/baker/normalize.py
+++ b/examples/parallelwave_gan/baker/normalize.py
@@ -15,18 +15,15 @@
 
 import argparse
 import logging
-import os
 from operator import itemgetter
 from pathlib import Path
 
-import numpy as np
-import yaml
 import jsonlines
+import numpy as np
+from parakeet.datasets.data_table import DataTable
 from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm
 
-from parakeet.datasets.data_table import DataTable
-
 from config import get_cfg_default
 
 
diff --git a/examples/parallelwave_gan/baker/preprocess.py b/examples/parallelwave_gan/baker/preprocess.py
index 92021eb..83782c3 100644
--- a/examples/parallelwave_gan/baker/preprocess.py
+++ b/examples/parallelwave_gan/baker/preprocess.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 from operator import itemgetter
-from typing import List, Dict, Any
+from typing import Any
+from typing import Dict
+from typing import List
 
 import argparse
 import jsonlines
@@ -39,8 +41,8 @@ def process_sentence(config: Dict[str, Any],
     # reading
     y, sr = librosa.load(str(fp), sr=config.sr)  # resampling may occur
     assert len(y.shape) == 1, f"{utt_id} is not a mono-channel audio."
-    assert np.abs(y).max(
-    ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
+    assert np.abs(
+        y).max() <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
     duration = librosa.get_duration(y, sr=sr)
 
     # trim according to the alignment file
@@ -80,8 +82,8 @@ def process_sentence(config: Dict[str, Any],
     # adjust time to make num_samples == num_frames * hop_length
     num_frames = logmel.shape[0]
     if y.size < num_frames * config.hop_length:
-        y = np.pad(y, (0, num_frames * config.hop_length - y.size),
-                   mode="reflect")
+        y = np.pad(
+            y, (0, num_frames * config.hop_length - y.size), mode="reflect")
     else:
         y = y[:num_frames * config.hop_length]
     num_sample = y.shape[0]
@@ -139,10 +141,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Preprocess audio and then extract features .")
     parser.add_argument(
-        "--rootdir",
-        default=None,
-        type=str,
-        help="directory to baker dataset.")
+        "--rootdir", default=None, type=str, help="directory to baker dataset.")
     parser.add_argument(
         "--dumpdir",
         type=str,
diff --git a/examples/parallelwave_gan/baker/pwg_updater.py b/examples/parallelwave_gan/baker/pwg_updater.py
index 90cf655..6b47584 100644
--- a/examples/parallelwave_gan/baker/pwg_updater.py
+++ b/examples/parallelwave_gan/baker/pwg_updater.py
@@ -20,17 +20,11 @@ from paddle.nn import Layer
 from paddle.optimizer import Optimizer
 from paddle.optimizer.lr import LRScheduler
 from paddle.io import DataLoader
-from paddle.io import DistributedBatchSampler
 from timer import timer
 
-from parakeet.datasets.data_table import DataTable
 from parakeet.training.updaters.standard_updater import StandardUpdater, UpdaterState
 from parakeet.training.extensions.evaluator import StandardEvaluator
-from parakeet.training.trainer import Trainer
 from parakeet.training.reporter import report
-from parakeet.models.parallel_wavegan import PWGGenerator, PWGDiscriminator
-from parakeet.modules.stft_loss import MultiResolutionSTFTLoss
-from parakeet.utils.profile import synchronize
 
 
 class PWGUpdater(StandardUpdater):
diff --git a/examples/parallelwave_gan/baker/synthesize.py b/examples/parallelwave_gan/baker/synthesize.py
index 1216220..8a78ad5 100644
--- a/examples/parallelwave_gan/baker/synthesize.py
+++ b/examples/parallelwave_gan/baker/synthesize.py
@@ -12,20 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-import sys
-from timer import timer
-import logging
 import argparse
+import os
 from pathlib import Path
+from timer import timer
 
-import yaml
 import jsonlines
-import paddle
 import numpy as np
+import paddle
 import soundfile as sf
+import yaml
 from paddle import distributed as dist
-
 from parakeet.datasets.data_table import DataTable
 from parakeet.models.parallel_wavegan import PWGGenerator
 
diff --git a/examples/parallelwave_gan/baker/train.py b/examples/parallelwave_gan/baker/train.py
index 1bf0a90..7082494 100644
--- a/examples/parallelwave_gan/baker/train.py
+++ b/examples/parallelwave_gan/baker/train.py
@@ -130,8 +130,7 @@ def train_sp(args, config):
         parameters=generator.parameters(),
         **config["generator_optimizer_params"])
     lr_schedule_d = StepDecay(**config["discriminator_scheduler_params"])
-    gradient_clip_d = nn.ClipGradByGlobalNorm(config[
-        "discriminator_grad_norm"])
+    gradient_clip_d = nn.ClipGradByGlobalNorm(config["discriminator_grad_norm"])
     optimizer_d = Adam(
         learning_rate=lr_schedule_d,
         grad_clip=gradient_clip_d,
@@ -184,8 +183,7 @@ def train_sp(args, config):
         stop_trigger=(config.train_max_steps, "iteration"),
         out=output_dir, )
 
-    trainer.extend(
-        evaluator, trigger=(config.eval_interval_steps, 'iteration'))
+    trainer.extend(evaluator, trigger=(config.eval_interval_steps, 'iteration'))
     if dist.get_rank() == 0:
         writer = LogWriter(str(trainer.out))
         trainer.extend(VisualDL(writer), trigger=(1, 'iteration'))
diff --git a/examples/speedyspeech/baker/batch_fn.py b/examples/speedyspeech/baker/batch_fn.py
index 6bc0df7..e9089ed 100644
--- a/examples/speedyspeech/baker/batch_fn.py
+++ b/examples/speedyspeech/baker/batch_fn.py
@@ -22,8 +22,7 @@ def collate_baker_examples(examples):
     tones = [np.array(item["tones"], dtype=np.int64) for item in examples]
     feats = [np.array(item["feats"], dtype=np.float32) for item in examples]
     durations = [
-        np.array(
-            item["durations"], dtype=np.int64) for item in examples
+        np.array(item["durations"], dtype=np.int64) for item in examples
     ]
     num_phones = np.array([item["num_phones"] for item in examples])
     num_frames = np.array([item["num_frames"] for item in examples])
diff --git a/examples/speedyspeech/baker/compute_statistics.py b/examples/speedyspeech/baker/compute_statistics.py
index 3d3dd5b..ab8767c 100644
--- a/examples/speedyspeech/baker/compute_statistics.py
+++ b/examples/speedyspeech/baker/compute_statistics.py
@@ -15,21 +15,14 @@
 
 import argparse
 import logging
-import os
 from pathlib import Path
 
-import numpy as np
-import yaml
-import json
 import jsonlines
-
+import numpy as np
+from parakeet.datasets.data_table import DataTable
 from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm
 
-from parakeet.datasets.data_table import DataTable
-from parakeet.utils.h5_utils import read_hdf5
-from parakeet.utils.h5_utils import write_hdf5
-
 from config import get_cfg_default
 
 
diff --git a/examples/speedyspeech/baker/frontend.py b/examples/speedyspeech/baker/frontend.py
index e8869dd..85029ad 100644
--- a/examples/speedyspeech/baker/frontend.py
+++ b/examples/speedyspeech/baker/frontend.py
@@ -17,7 +17,6 @@ from pathlib import Path
 
 import numpy as np
 import paddle
-import pypinyin
 from pypinyin import lazy_pinyin, Style
 import jieba
 import phkit
diff --git a/examples/speedyspeech/baker/inference.py b/examples/speedyspeech/baker/inference.py
index 3bd4384..2be9322 100644
--- a/examples/speedyspeech/baker/inference.py
+++ b/examples/speedyspeech/baker/inference.py
@@ -15,9 +15,8 @@
 import argparse
 from pathlib import Path
 
-import numpy as np
-from paddle import inference
 import soundfile as sf
+from paddle import inference
 
 from frontend import text_analysis
 
@@ -73,8 +72,8 @@ def main():
 
         speedyspeech_predictor.run()
         output_names = speedyspeech_predictor.get_output_names()
-        output_handle = speedyspeech_predictor.get_output_handle(output_names[
-            0])
+        output_handle = speedyspeech_predictor.get_output_handle(
+            output_names[0])
         output_data = output_handle.copy_to_cpu()
 
         input_names = pwg_predictor.get_input_names()
diff --git a/examples/speedyspeech/baker/normalize.py b/examples/speedyspeech/baker/normalize.py
index 2d1b028..f453898 100644
--- a/examples/speedyspeech/baker/normalize.py
+++ b/examples/speedyspeech/baker/normalize.py
@@ -15,19 +15,16 @@
 
 import argparse
 import logging
-import os
-from copy import copy
 from operator import itemgetter
 from pathlib import Path
 
-import numpy as np
-import yaml
 import jsonlines
+import numpy as np
 from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm
 
-from parakeet.frontend.vocab import Vocab
 from parakeet.datasets.data_table import DataTable
+from parakeet.frontend.vocab import Vocab
 
 from config import get_cfg_default
 
@@ -100,7 +97,10 @@ def main():
     for item in metadata:
         item["feats"] = str(metadata_dir / item["feats"])
 
-    dataset = DataTable(metadata, converters={'feats': np.load, })
+    dataset = DataTable(
+        metadata, converters={
+            'feats': np.load,
+        })
     logging.info(f"The number of files = {len(dataset)}.")
 
     # restore scaler
diff --git a/examples/speedyspeech/baker/preprocess.py b/examples/speedyspeech/baker/preprocess.py
index 2c720b7..1ec0ed9 100644
--- a/examples/speedyspeech/baker/preprocess.py
+++ b/examples/speedyspeech/baker/preprocess.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 from operator import itemgetter
-from typing import List, Dict, Any
+from typing import Any
+from typing import Dict
+from typing import List
 
 import argparse
 import jsonlines
@@ -41,8 +43,8 @@ def process_sentence(config: Dict[str, Any],
     # reading
     y, sr = librosa.load(str(fp), sr=config.sr)  # resampling may occur
     assert len(y.shape) == 1, f"{utt_id} is not a mono-channel audio."
-    assert np.abs(y).max(
-    ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
+    assert np.abs(
+        y).max() <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
     duration = librosa.get_duration(y, sr=sr)
 
     # intervals with empty lables are ignored
@@ -162,10 +164,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Preprocess audio and then extract features.")
     parser.add_argument(
-        "--rootdir",
-        default=None,
-        type=str,
-        help="directory to baker dataset.")
+        "--rootdir", default=None, type=str, help="directory to baker dataset.")
     parser.add_argument(
         "--dumpdir",
         type=str,
diff --git a/examples/speedyspeech/baker/speedyspeech_updater.py b/examples/speedyspeech/baker/speedyspeech_updater.py
index bbb65d7..daa0f57 100644
--- a/examples/speedyspeech/baker/speedyspeech_updater.py
+++ b/examples/speedyspeech/baker/speedyspeech_updater.py
@@ -13,15 +13,13 @@
 # limitations under the License.
 
 import paddle
-from paddle.nn import functional as F
 from paddle.fluid.layers import huber_loss
-
-from parakeet.modules.ssim import ssim
+from paddle.nn import functional as F
 from parakeet.modules.losses import masked_l1_loss, weighted_mean
+from parakeet.modules.ssim import ssim
+from parakeet.training.extensions.evaluator import StandardEvaluator
 from parakeet.training.reporter import report
 from parakeet.training.updaters.standard_updater import StandardUpdater
-from parakeet.training.extensions.evaluator import StandardEvaluator
-from parakeet.models.speedyspeech import SpeedySpeech
 
 
 class SpeedySpeechUpdater(StandardUpdater):
diff --git a/examples/speedyspeech/baker/synthesize.py b/examples/speedyspeech/baker/synthesize.py
index 0cddf73..65fccb4 100644
--- a/examples/speedyspeech/baker/synthesize.py
+++ b/examples/speedyspeech/baker/synthesize.py
@@ -11,30 +11,25 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import os
-import sys
 import logging
 import argparse
-import dataclasses
 from pathlib import Path
 
-import yaml
 import jsonlines
-import paddle
 import numpy as np
 import soundfile as sf
 import paddle
-from paddle import nn
-from paddle.nn import functional as F
-from paddle import distributed as dist
+import yaml
 from paddle import jit
 from paddle.static import InputSpec
 from yacs.config import CfgNode
 
 from parakeet.datasets.data_table import DataTable
-from parakeet.models.speedyspeech import SpeedySpeech, SpeedySpeechInference
-from parakeet.models.parallel_wavegan import PWGGenerator, PWGInference
+from parakeet.models.speedyspeech import SpeedySpeech
+from parakeet.models.speedyspeech import SpeedySpeechInference
+from parakeet.models.parallel_wavegan import PWGGenerator
+from parakeet.models.parallel_wavegan import PWGInference
 from parakeet.modules.normalizer import ZScore
 
 
@@ -79,9 +74,8 @@ def evaluate(args, speedyspeech_config, pwg_config):
     speedyspeech_inference = jit.to_static(
         speedyspeech_inference,
         input_spec=[
-            InputSpec(
-                [-1], dtype=paddle.int64), InputSpec(
-                    [-1], dtype=paddle.int64)
+            InputSpec([-1], dtype=paddle.int64), InputSpec(
+                [-1], dtype=paddle.int64)
         ])
     paddle.jit.save(speedyspeech_inference,
                     os.path.join(args.inference_dir, "speedyspeech"))
@@ -91,9 +85,9 @@ def evaluate(args, speedyspeech_config, pwg_config):
     pwg_inference = PWGInference(pwg_normalizer, vocoder)
     pwg_inference.eval()
     pwg_inference = jit.to_static(
-        pwg_inference,
-        input_spec=[InputSpec(
-            [-1, 80], dtype=paddle.float32), ])
+        pwg_inference, input_spec=[
+            InputSpec([-1, 80], dtype=paddle.float32),
+        ])
     paddle.jit.save(pwg_inference, os.path.join(args.inference_dir, "pwg"))
     pwg_inference = paddle.jit.load(os.path.join(args.inference_dir, "pwg"))
 
@@ -119,9 +113,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Synthesize with speedyspeech & parallel wavegan.")
     parser.add_argument(
-        "--speedyspeech-config",
-        type=str,
-        help="config file for speedyspeech.")
+        "--speedyspeech-config", type=str, help="config file for speedyspeech.")
     parser.add_argument(
         "--speedyspeech-checkpoint",
         type=str,
diff --git a/examples/speedyspeech/baker/synthesize.sh b/examples/speedyspeech/baker/synthesize.sh
index 18f056d..2bca37c 100644
--- a/examples/speedyspeech/baker/synthesize.sh
+++ b/examples/speedyspeech/baker/synthesize.sh
@@ -1,6 +1,6 @@
 python synthesize.py \
   --speedyspeech-config=conf/default.yaml \
-  --speedyspeech-checkpoint=exp/debug/checkpoints/snapshot_iter_91800.pdz \
+  --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_91800.pdz \
   --speedyspeech-stat=dump/train/stats.npy \
   --pwg-config=../../parallelwave_gan/baker/conf/default.yaml \
   --pwg-params=../../parallelwave_gan/baker/converted.pdparams \
diff --git a/examples/speedyspeech/baker/synthesize_e2e.py b/examples/speedyspeech/baker/synthesize_e2e.py
index 8bf911b..f9ec33e 100644
--- a/examples/speedyspeech/baker/synthesize_e2e.py
+++ b/examples/speedyspeech/baker/synthesize_e2e.py
@@ -13,28 +13,22 @@
 # limitations under the License.
 
 import os
-import sys
 import logging
 import argparse
-import dataclasses
 from pathlib import Path
 
-import yaml
-import jsonlines
-import paddle
 import numpy as np
 import soundfile as sf
 import paddle
+import yaml
 from paddle import jit
 from paddle.static import InputSpec
-from paddle import nn
-from paddle.nn import functional as F
-from paddle import distributed as dist
 from yacs.config import CfgNode
 
-from parakeet.datasets.data_table import DataTable
-from parakeet.models.speedyspeech import SpeedySpeech, SpeedySpeechInference
-from parakeet.models.parallel_wavegan import PWGGenerator, PWGInference
+from parakeet.models.speedyspeech import SpeedySpeech
+from parakeet.models.speedyspeech import SpeedySpeechInference
+from parakeet.models.parallel_wavegan import PWGGenerator
+from parakeet.models.parallel_wavegan import PWGInference
 from parakeet.modules.normalizer import ZScore
 
 from frontend import text_analysis
@@ -57,8 +51,7 @@ def evaluate(args, speedyspeech_config, pwg_config):
     model.eval()
 
     vocoder = PWGGenerator(**pwg_config["generator_params"])
-    vocoder.set_state_dict(
-        paddle.load(args.pwg_checkpoint)["generator_params"])
+    vocoder.set_state_dict(paddle.load(args.pwg_checkpoint)["generator_params"])
     vocoder.remove_weight_norm()
     vocoder.eval()
     print("model done!")
@@ -81,9 +74,8 @@ def evaluate(args, speedyspeech_config, pwg_config):
     speedyspeech_inference = jit.to_static(
         speedyspeech_inference,
         input_spec=[
-            InputSpec(
-                [-1], dtype=paddle.int64), InputSpec(
-                    [-1], dtype=paddle.int64)
+            InputSpec([-1], dtype=paddle.int64), InputSpec(
+                [-1], dtype=paddle.int64)
         ])
     paddle.jit.save(speedyspeech_inference,
                     os.path.join(args.inference_dir, "speedyspeech"))
@@ -93,9 +85,9 @@ def evaluate(args, speedyspeech_config, pwg_config):
     pwg_inference = PWGInference(pwg_normalizer, vocoder)
     pwg_inference.eval()
     pwg_inference = jit.to_static(
-        pwg_inference,
-        input_spec=[InputSpec(
-            [-1, 80], dtype=paddle.float32), ])
+        pwg_inference, input_spec=[
+            InputSpec([-1, 80], dtype=paddle.float32),
+        ])
     paddle.jit.save(pwg_inference, os.path.join(args.inference_dir, "pwg"))
     pwg_inference = paddle.jit.load(os.path.join(args.inference_dir, "pwg"))
 
@@ -119,9 +111,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Synthesize with speedyspeech & parallel wavegan.")
     parser.add_argument(
-        "--speedyspeech-config",
-        type=str,
-        help="config file for speedyspeech.")
+        "--speedyspeech-config", type=str, help="config file for speedyspeech.")
     parser.add_argument(
         "--speedyspeech-checkpoint",
         type=str,
diff --git a/examples/speedyspeech/baker/tg_utils.py b/examples/speedyspeech/baker/tg_utils.py
index 18c0385..e6ea593 100644
--- a/examples/speedyspeech/baker/tg_utils.py
+++ b/examples/speedyspeech/baker/tg_utils.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import librosa
-from praatio import tgio
 
 
 def validate_textgrid(text_grid, num_samples, sr):
diff --git a/examples/speedyspeech/baker/train.py b/examples/speedyspeech/baker/train.py
index c4b5b20..6f063a6 100644
--- a/examples/speedyspeech/baker/train.py
+++ b/examples/speedyspeech/baker/train.py
@@ -72,7 +72,9 @@ def train_sp(args, config):
         fields=[
             "phones", "tones", "num_phones", "num_frames", "feats", "durations"
         ],
-        converters={"feats": np.load, }, )
+        converters={
+            "feats": np.load,
+        }, )
     with jsonlines.open(args.dev_metadata, 'r') as reader:
         dev_metadata = list(reader)
     metadata_dir = Path(args.dev_metadata).parent
@@ -83,7 +85,9 @@ def train_sp(args, config):
         fields=[
             "phones", "tones", "num_phones", "num_frames", "feats", "durations"
         ],
-        converters={"feats": np.load, }, )
+        converters={
+            "feats": np.load,
+        }, )
 
     # collate function and dataloader
     train_sampler = DistributedBatchSampler(
diff --git a/examples/tacotron2/ljspeech.py b/examples/tacotron2/ljspeech.py
index a5054d4..76e4b3a 100644
--- a/examples/tacotron2/ljspeech.py
+++ b/examples/tacotron2/ljspeech.py
@@ -46,8 +46,7 @@ class LJSpeech(Dataset):
 class LJSpeechCollector(object):
     """A simple callable to batch LJSpeech examples."""
 
-    def __init__(self, padding_idx=0, padding_value=0.,
-                 padding_stop_token=1.0):
+    def __init__(self, padding_idx=0, padding_value=0., padding_stop_token=1.0):
         self.padding_idx = padding_idx
         self.padding_value = padding_value
         self.padding_stop_token = padding_stop_token
diff --git a/examples/tacotron2/preprocess.py b/examples/tacotron2/preprocess.py
index f93aa46..aa7bf24 100644
--- a/examples/tacotron2/preprocess.py
+++ b/examples/tacotron2/preprocess.py
@@ -63,8 +63,7 @@ def create_dataset(config, source_path, target_path, verbose=False):
     with open(target_path / "metadata.pkl", 'wb') as f:
         pickle.dump(records, f)
         if verbose:
-            print("saved metadata into {}".format(target_path /
-                                                  "metadata.pkl"))
+            print("saved metadata into {}".format(target_path / "metadata.pkl"))
 
     print("Done.")
 
diff --git a/examples/tacotron2/train.py b/examples/tacotron2/train.py
index 20fdd40..82dd4c3 100644
--- a/examples/tacotron2/train.py
+++ b/examples/tacotron2/train.py
@@ -14,14 +14,13 @@
 
 import time
 from collections import defaultdict
+
 import numpy as np
-
 import paddle
+from paddle.io import DataLoader
+from paddle.io import DistributedBatchSampler
 from paddle import distributed as dist
-from paddle.io import DataLoader, DistributedBatchSampler
-
 from parakeet.data import dataset
-from parakeet.frontend import EnglishCharacter  # pylint: disable=unused-import
 from parakeet.training.cli import default_argument_parser
 from parakeet.training.experiment import ExperimentBase
 from parakeet.utils import display, mp_tools
@@ -74,8 +73,7 @@ class Experiment(ExperimentBase):
 
         if dist.get_rank() == 0:
             for k, v in losses_np.items():
-                self.visualizer.add_scalar(f"train_loss/{k}", v,
-                                           self.iteration)
+                self.visualizer.add_scalar(f"train_loss/{k}", v, self.iteration)
 
     @mp_tools.rank_zero_only
     @paddle.no_grad()
diff --git a/examples/tacotron2_aishell3/aishell3.py b/examples/tacotron2_aishell3/aishell3.py
index 66b4680..c53cf59 100644
--- a/examples/tacotron2_aishell3/aishell3.py
+++ b/examples/tacotron2_aishell3/aishell3.py
@@ -65,8 +65,8 @@ def collate_aishell3_examples(examples):
     text_lengths = np.array([item.shape[0] for item in phones], dtype=np.int64)
     spec_lengths = np.array([item.shape[1] for item in mel], dtype=np.int64)
     T_dec = np.max(spec_lengths)
-    stop_tokens = (np.arange(T_dec) >= np.expand_dims(spec_lengths, -1)
-                   ).astype(np.float32)
+    stop_tokens = (
+        np.arange(T_dec) >= np.expand_dims(spec_lengths, -1)).astype(np.float32)
     phones, _ = batch_text_id(phones)
     tones, _ = batch_text_id(tones)
     mel, _ = batch_spec(mel)
diff --git a/examples/tacotron2_aishell3/preprocess_transcription.py b/examples/tacotron2_aishell3/preprocess_transcription.py
index 2d4aa85..fa74331 100644
--- a/examples/tacotron2_aishell3/preprocess_transcription.py
+++ b/examples/tacotron2_aishell3/preprocess_transcription.py
@@ -121,8 +121,8 @@ def convert(syllable):
     syllable = syllable.replace("ing", "ieng").replace("in", "ien")
 
     # expansion for un, ui, iu
-    syllable = syllable.replace("un", "uen").replace(
-        "ui", "uei").replace("iu", "iou")
+    syllable = syllable.replace("un", "uen").replace("ui",
+                                                     "uei").replace("iu", "iou")
 
     # rule for variants of i
     syllable = syllable.replace("zi", "zii").replace("ci", "cii").replace("si", "sii")\
diff --git a/examples/tacotron2_aishell3/process_wav.py b/examples/tacotron2_aishell3/process_wav.py
index e3a1c73..34d4089 100644
--- a/examples/tacotron2_aishell3/process_wav.py
+++ b/examples/tacotron2_aishell3/process_wav.py
@@ -68,8 +68,7 @@ def preprocess_aishell3(source_dir, target_dir, alignment_dir):
         alignment_dir=alignment_dir)
     with Pool(16) as p:
         list(
-            tqdm(
-                p.imap(fx, wav_paths), total=len(wav_paths), unit="utterance"))
+            tqdm(p.imap(fx, wav_paths), total=len(wav_paths), unit="utterance"))
 
 
 if __name__ == "__main__":
diff --git a/examples/tacotron2_aishell3/train.py b/examples/tacotron2_aishell3/train.py
index 64b5166..de01811 100644
--- a/examples/tacotron2_aishell3/train.py
+++ b/examples/tacotron2_aishell3/train.py
@@ -109,8 +109,7 @@ class Experiment(ExperimentBase):
             mel_pred = outputs['mel_outputs_postnet']
             self.visualizer.add_figure(
                 f"valid_sentence_{i}_predicted_spectrogram",
-                display.plot_spectrogram(mel_pred[0].numpy().T),
-                self.iteration)
+                display.plot_spectrogram(mel_pred[0].numpy().T), self.iteration)
 
         # write visual log
         valid_losses = {k: np.mean(v) for k, v in valid_losses.items()}
diff --git a/examples/text_frontend/get_textnorm_data.py b/examples/text_frontend/get_textnorm_data.py
index 11c5c0f..8058e05 100644
--- a/examples/text_frontend/get_textnorm_data.py
+++ b/examples/text_frontend/get_textnorm_data.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import argparse
-import re
 from pathlib import Path
 
 
diff --git a/examples/text_frontend/test_g2p.py b/examples/text_frontend/test_g2p.py
index 90f7bf2..ba456e9 100644
--- a/examples/text_frontend/test_g2p.py
+++ b/examples/text_frontend/test_g2p.py
@@ -40,6 +40,7 @@ def get_avg_wer(raw_dict, ref_dict, frontend, output_dir):
         raw_text = raw_dict[utt_id]
         text = text_cleaner(raw_text)
         g2p_phones = frontend.get_phonemes(text)
+        g2p_phones = sum(g2p_phones, [])
         gt_phones = ref_dict[utt_id].split(" ")
         # delete silence tokens in predicted phones and ground truth phones
         g2p_phones = [phn for phn in g2p_phones if phn not in SILENCE_TOKENS]
diff --git a/examples/transformer_tts/ljspeech.py b/examples/transformer_tts/ljspeech.py
index f8fcc67..6397f3c 100644
--- a/examples/transformer_tts/ljspeech.py
+++ b/examples/transformer_tts/ljspeech.py
@@ -53,10 +53,10 @@ class Transform(object):
         ids, mel = example  # ids already have <s> and </s>
         ids = np.array(ids, dtype=np.int64)
         # add start and end frame
-        mel = np.pad(mel, [(0, 0), (1, 1)],
-                     mode='constant',
-                     constant_values=[(0, 0),
-                                      (self.start_value, self.end_value)])
+        mel = np.pad(
+            mel, [(0, 0), (1, 1)],
+            mode='constant',
+            constant_values=[(0, 0), (self.start_value, self.end_value)])
         stop_labels = np.ones([mel.shape[1]], dtype=np.int64)
         stop_labels[-1] = 2
         # actually this thing can also be done within the model
diff --git a/examples/transformer_tts/preprocess.py b/examples/transformer_tts/preprocess.py
index 23fbc7f..670227e 100644
--- a/examples/transformer_tts/preprocess.py
+++ b/examples/transformer_tts/preprocess.py
@@ -64,8 +64,7 @@ def create_dataset(config, source_path, target_path, verbose=False):
     with open(target_path / "metadata.pkl", 'wb') as f:
         pickle.dump(records, f)
         if verbose:
-            print("saved metadata into {}".format(target_path /
-                                                  "metadata.pkl"))
+            print("saved metadata into {}".format(target_path / "metadata.pkl"))
 
     # also save meta data into text format for inspection
     with open(target_path / "metadata.txt", 'wt') as f:
@@ -73,8 +72,7 @@ def create_dataset(config, source_path, target_path, verbose=False):
             phoneme_str = "|".join(phonemes)
             f.write("{}\t{}\t{}\n".format(mel_name, text, phoneme_str))
         if verbose:
-            print("saved metadata into {}".format(target_path /
-                                                  "metadata.txt"))
+            print("saved metadata into {}".format(target_path / "metadata.txt"))
 
     print("Done.")
 
diff --git a/examples/transformer_tts/synthesize.py b/examples/transformer_tts/synthesize.py
index 14bdfcb..6b49f3a 100644
--- a/examples/transformer_tts/synthesize.py
+++ b/examples/transformer_tts/synthesize.py
@@ -60,7 +60,7 @@ def main(config, args):
         display.plot_multilayer_multihead_alignments(attns)
         plt.savefig(str(output_dir / f"sentence_{i}.png"))
 
-        mel_output = mel_output.T  #(C, T)
+        mel_output = mel_output.T  # (C, T)
         np.save(str(output_dir / f"sentence_{i}"), mel_output)
         if args.verbose:
             print("spectrogram saved at {}".format(output_dir /
diff --git a/examples/transformer_tts/train.py b/examples/transformer_tts/train.py
index ff0c40f..e2da676 100644
--- a/examples/transformer_tts/train.py
+++ b/examples/transformer_tts/train.py
@@ -76,8 +76,7 @@ class TransformerTTSExperiment(ExperimentBase):
         ljspeech_dataset = LJSpeech(args.data)
         transform = Transform(config.data.mel_start_value,
                               config.data.mel_end_value)
-        ljspeech_dataset = dataset.TransformDataset(ljspeech_dataset,
-                                                    transform)
+        ljspeech_dataset = dataset.TransformDataset(ljspeech_dataset, transform)
         valid_set, train_set = dataset.split(ljspeech_dataset,
                                              config.data.valid_size)
         batch_fn = LJSpeechCollector(padding_idx=config.data.padding_idx)
@@ -159,8 +158,7 @@ class TransformerTTSExperiment(ExperimentBase):
 
         if dist.get_rank() == 0:
             for k, v in losses_np.items():
-                self.visualizer.add_scalar(f"train_loss/{k}", v,
-                                           self.iteration)
+                self.visualizer.add_scalar(f"train_loss/{k}", v, self.iteration)
 
     @mp_tools.rank_zero_only
     @paddle.no_grad()
diff --git a/examples/use_mfa/local/generate_lexicon.py b/examples/use_mfa/local/generate_lexicon.py
index 1791e7b..b6e594a 100644
--- a/examples/use_mfa/local/generate_lexicon.py
+++ b/examples/use_mfa/local/generate_lexicon.py
@@ -90,8 +90,8 @@ def rule(C, V, R, T):
         return None
 
     # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼
-    if V in ['ua', 'uai', 'uang'
-             ] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
+    if V in ['ua', 'uai',
+             'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
         return None
 
     # sh 和 ong 不能拼
diff --git a/examples/waveflow/preprocess.py b/examples/waveflow/preprocess.py
index 1d2ca25..199081c 100644
--- a/examples/waveflow/preprocess.py
+++ b/examples/waveflow/preprocess.py
@@ -28,8 +28,8 @@ from config import get_cfg_defaults
 
 
 class Transform(object):
-    def __init__(self, sample_rate, n_fft, win_length, hop_length, n_mels,
-                 fmin, fmax):
+    def __init__(self, sample_rate, n_fft, win_length, hop_length, n_mels, fmin,
+                 fmax):
         self.sample_rate = sample_rate
         self.n_fft = n_fft
         self.win_length = win_length
@@ -79,11 +79,8 @@ class Transform(object):
         spectrogram_magnitude = np.abs(spectrogram)
 
         # Compute mel-spectrograms.
-        mel_filter_bank = librosa.filters.mel(sr=sr,
-                                              n_fft=n_fft,
-                                              n_mels=n_mels,
-                                              fmin=fmin,
-                                              fmax=fmax)
+        mel_filter_bank = librosa.filters.mel(
+            sr=sr, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax)
         mel_spectrogram = np.dot(mel_filter_bank, spectrogram_magnitude)
 
         # log scale mel_spectrogram.
diff --git a/examples/waveflow/synthesize.py b/examples/waveflow/synthesize.py
index bb7c0ff..e25cec3 100644
--- a/examples/waveflow/synthesize.py
+++ b/examples/waveflow/synthesize.py
@@ -39,8 +39,7 @@ def main(config, args):
         mel = np.load(str(file_path))
         with paddle.amp.auto_cast():
             audio = model.predict(mel)
-        audio_path = output_dir / (
-            os.path.splitext(file_path.name)[0] + ".wav")
+        audio_path = output_dir / (os.path.splitext(file_path.name)[0] + ".wav")
         sf.write(audio_path, audio, config.data.sample_rate)
         print("[synthesize] {} -> {}".format(file_path, audio_path))
 
diff --git a/examples/waveflow/train.py b/examples/waveflow/train.py
index feddf20..359670f 100644
--- a/examples/waveflow/train.py
+++ b/examples/waveflow/train.py
@@ -114,8 +114,7 @@ class Experiment(ExperimentBase):
         msg += "loss: {:>.6f}".format(loss_value)
         self.logger.info(msg)
         if dist.get_rank() == 0:
-            self.visualizer.add_scalar("train/loss", loss_value,
-                                       self.iteration)
+            self.visualizer.add_scalar("train/loss", loss_value, self.iteration)
 
     @mp_tools.rank_zero_only
     @paddle.no_grad()
diff --git a/parakeet/__init__.py b/parakeet/__init__.py
index f08f907..67be25b 100644
--- a/parakeet/__init__.py
+++ b/parakeet/__init__.py
@@ -13,6 +13,3 @@
 # limitations under the License.
 
 __version__ = "0.0.0"
-
-import logging
-from parakeet import audio, data, datasets, frontend, models, modules, training, utils
diff --git a/parakeet/audio/__init__.py b/parakeet/audio/__init__.py
index 7fc437c..abf198b 100644
--- a/parakeet/audio/__init__.py
+++ b/parakeet/audio/__init__.py
@@ -11,6 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from .audio import AudioProcessor
-from .spec_normalizer import NormalizerBase, LogMagnitude
\ No newline at end of file
diff --git a/parakeet/audio/audio.py b/parakeet/audio/audio.py
index c0d4c97..ab9a45d 100644
--- a/parakeet/audio/audio.py
+++ b/parakeet/audio/audio.py
@@ -11,10 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import librosa
-import soundfile as sf
 import numpy as np
+import soundfile as sf
 
 __all__ = ["AudioProcessor"]
 
@@ -53,11 +52,12 @@ class AudioProcessor(object):
         self.inv_mel_filter = np.linalg.pinv(self.mel_filter)
 
     def _create_mel_filter(self):
-        mel_filter = librosa.filters.mel(self.sample_rate,
-                                         self.n_fft,
-                                         n_mels=self.n_mels,
-                                         fmin=self.fmin,
-                                         fmax=self.fmax)
+        mel_filter = librosa.filters.mel(
+            self.sample_rate,
+            self.n_fft,
+            n_mels=self.n_mels,
+            fmin=self.fmin,
+            fmax=self.fmax)
         return mel_filter
 
     def read_wav(self, filename):
diff --git a/parakeet/data/__init__.py b/parakeet/data/__init__.py
index 23476bc..2fed920 100644
--- a/parakeet/data/__init__.py
+++ b/parakeet/data/__init__.py
@@ -13,20 +13,3 @@
 # limitations under the License.
 """Parakeet's infrastructure for data processing.
 """
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from parakeet.data.batch import *
-from parakeet.data.dataset import *
-from parakeet.data.get_feats import *
diff --git a/parakeet/data/batch.py b/parakeet/data/batch.py
index d5f5e91..515074d 100644
--- a/parakeet/data/batch.py
+++ b/parakeet/data/batch.py
@@ -61,9 +61,10 @@ def batch_text_id(minibatch, pad_id=0, dtype=np.int64):
     for example in minibatch:
         pad_len = max_len - example.shape[0]
         batch.append(
-            np.pad(example, [(0, pad_len)],
-                   mode='constant',
-                   constant_values=pad_id))
+            np.pad(
+                example, [(0, pad_len)],
+                mode='constant',
+                constant_values=pad_id))
 
     return np.array(batch, dtype=dtype), np.array(lengths, dtype=np.int64)
 
@@ -103,9 +104,10 @@ def batch_wav(minibatch, pad_value=0., dtype=np.float32):
     for example in minibatch:
         pad_len = max_len - example.shape[-1]
         batch.append(
-            np.pad(example, [(0, pad_len)],
-                   mode='constant',
-                   constant_values=pad_value))
+            np.pad(
+                example, [(0, pad_len)],
+                mode='constant',
+                constant_values=pad_value))
     return np.array(batch, dtype=dtype), np.array(lengths, dtype=np.int64)
 
 
@@ -152,14 +154,16 @@ def batch_spec(minibatch, pad_value=0., time_major=False, dtype=np.float32):
         pad_len = max_len - example.shape[time_idx]
         if time_major:
             batch.append(
-                np.pad(example, [(0, pad_len), (0, 0)],
-                       mode='constant',
-                       constant_values=pad_value))
+                np.pad(
+                    example, [(0, pad_len), (0, 0)],
+                    mode='constant',
+                    constant_values=pad_value))
         else:
             batch.append(
-                np.pad(example, [(0, 0), (0, pad_len)],
-                       mode='constant',
-                       constant_values=pad_value))
+                np.pad(
+                    example, [(0, 0), (0, pad_len)],
+                    mode='constant',
+                    constant_values=pad_value))
     return np.array(batch, dtype=dtype), np.array(lengths, dtype=np.int64)
 
 
@@ -178,10 +182,8 @@ def batch_sequences(sequences, axis=0, pad_value=0):
     for seq, length in zip(sequences, seq_lengths):
         padding = [(0, 0)] * axis + [(0, max_length - length)] + [(0, 0)] * (
             ndim - axis - 1)
-        padded_seq = np.pad(seq,
-                            padding,
-                            mode='constant',
-                            constant_values=pad_value)
+        padded_seq = np.pad(
+            seq, padding, mode='constant', constant_values=pad_value)
         padded_sequences.append(padded_seq)
     batch = np.stack(padded_sequences)
     return batch
diff --git a/parakeet/data/dataset.py b/parakeet/data/dataset.py
index a188767..2d6c03c 100644
--- a/parakeet/data/dataset.py
+++ b/parakeet/data/dataset.py
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import six
-import paddle
 from paddle.io import Dataset
 
 __all__ = [
@@ -69,7 +67,7 @@ class CacheDataset(Dataset):
         return len(self._dataset)
 
     def __getitem__(self, i):
-        if not i in self._cache:
+        if i not in self._cache:
             self._cache[i] = self._dataset[i]
         return self._cache[i]
 
@@ -86,9 +84,8 @@ class TupleDataset(Dataset):
         length = len(datasets[0])
         for i, dataset in enumerate(datasets):
             if len(dataset) != length:
-                raise ValueError(
-                    "all the datasets should have the same length."
-                    "dataset {} has a different length".format(i))
+                raise ValueError("all the datasets should have the same length."
+                                 "dataset {} has a different length".format(i))
         self._datasets = datasets
         self._length = length
 
@@ -115,7 +112,7 @@ class DictDataset(Dataset):
         A compound dataset made from several datasets of the same length. An 
         example of the `DictDataset` is a dict of examples from the constituent 
         datasets.
-        
+
         WARNING: paddle does not have a good support for DictDataset, because
         every batch yield from a DataLoader is a list, but it cannot be a dict.
         So you have to provide a collate function because you cannot use the
diff --git a/parakeet/data/get_feats.py b/parakeet/data/get_feats.py
index 4027e9b..0acfd7f 100644
--- a/parakeet/data/get_feats.py
+++ b/parakeet/data/get_feats.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import librosa
 import numpy as np
 import pyworld
@@ -46,11 +45,12 @@ class LogMelFBank():
         self.mel_filter = self._create_mel_filter()
 
     def _create_mel_filter(self):
-        mel_filter = librosa.filters.mel(sr=self.sr,
-                                         n_fft=self.n_fft,
-                                         n_mels=self.n_mels,
-                                         fmin=self.fmin,
-                                         fmax=self.fmax)
+        mel_filter = librosa.filters.mel(
+            sr=self.sr,
+            n_fft=self.n_fft,
+            n_mels=self.n_mels,
+            fmin=self.fmin,
+            fmax=self.fmax)
         return mel_filter
 
     def _stft(self, wav):
@@ -121,11 +121,12 @@ class Pitch():
                       use_log_f0=True) -> np.array:
         input = input.astype(np.float)
         frame_period = 1000 * self.hop_length / self.sr
-        f0, timeaxis = pyworld.dio(input,
-                                   fs=self.sr,
-                                   f0_floor=self.f0min,
-                                   f0_ceil=self.f0max,
-                                   frame_period=frame_period)
+        f0, timeaxis = pyworld.dio(
+            input,
+            fs=self.sr,
+            f0_floor=self.f0min,
+            f0_ceil=self.f0max,
+            frame_period=frame_period)
         f0 = pyworld.stonemask(input, f0, timeaxis, self.sr)
         if use_continuous_f0:
             f0 = self._convert_to_continuous_f0(f0)
@@ -195,8 +196,7 @@ class Energy():
         input_power = np.abs(input_stft)**2
         energy = np.sqrt(
             np.clip(
-                np.sum(input_power, axis=0), a_min=1.0e-10, a_max=float(
-                    'inf')))
+                np.sum(input_power, axis=0), a_min=1.0e-10, a_max=float('inf')))
         return energy
 
     def _average_by_duration(self, input: np.array, d: np.array) -> np.array:
diff --git a/parakeet/datasets/__init__.py b/parakeet/datasets/__init__.py
index e75da0b..abf198b 100644
--- a/parakeet/datasets/__init__.py
+++ b/parakeet/datasets/__init__.py
@@ -11,6 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from parakeet.datasets.common import *
-from parakeet.datasets.ljspeech import *
\ No newline at end of file
diff --git a/parakeet/datasets/common.py b/parakeet/datasets/common.py
index 61d0c93..d6fa3a8 100644
--- a/parakeet/datasets/common.py
+++ b/parakeet/datasets/common.py
@@ -11,14 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from paddle.io import Dataset
-import os
-import librosa
 from pathlib import Path
-import numpy as np
 from typing import List
 
+import librosa
+import numpy as np
+from paddle.io import Dataset
+
 __all__ = ["AudioSegmentDataset", "AudioDataset", "AudioFolderDataset"]
 
 
@@ -57,7 +56,7 @@ class AudioSegmentDataset(Dataset):
 
 
 class AudioDataset(Dataset):
-    """A simple dataset adaptor for the audio files. 
+    """A simple dataset adaptor for the audio files.
     Read -> trim silence -> normalize
     """
 
diff --git a/parakeet/datasets/data_table.py b/parakeet/datasets/data_table.py
index 78a3608..b0e4c89 100644
--- a/parakeet/datasets/data_table.py
+++ b/parakeet/datasets/data_table.py
@@ -11,12 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from typing import Union, Optional, Callable, Tuple, List, Dict, Any
-from pathlib import Path
 from multiprocessing import Manager
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import List
 
-import numpy as np
 from paddle.io import Dataset
 
 
diff --git a/parakeet/datasets/ljspeech.py b/parakeet/datasets/ljspeech.py
index c34f52b..85cc3c1 100644
--- a/parakeet/datasets/ljspeech.py
+++ b/parakeet/datasets/ljspeech.py
@@ -11,9 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from pathlib import Path
 
 from paddle.io import Dataset
-from pathlib import Path
 
 __all__ = ["LJSpeechMetaData"]
 
diff --git a/parakeet/frontend/__init__.py b/parakeet/frontend/__init__.py
index b7b5874..abf198b 100644
--- a/parakeet/frontend/__init__.py
+++ b/parakeet/frontend/__init__.py
@@ -11,11 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from parakeet.frontend.vocab import *
-from parakeet.frontend.phonectic import *
-from parakeet.frontend.punctuation import *
-from parakeet.frontend.normalizer import *
-from parakeet.frontend.cn_normalization import *
-from parakeet.frontend.tone_sandhi import *
-from parakeet.frontend.generate_lexicon import *
diff --git a/parakeet/frontend/arpabet.py b/parakeet/frontend/arpabet.py
index e6f63b7..ae9212b 100644
--- a/parakeet/frontend/arpabet.py
+++ b/parakeet/frontend/arpabet.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from parakeet.frontend.phonectic import Phonetics
 """
 A phonology system with ARPABET symbols and limited punctuations. The G2P 
@@ -200,8 +199,7 @@ class ARPABET(Phonetics):
             The list of pronunciation id sequence.
         """
         return self.numericalize(
-            self.phoneticize(
-                sentence, add_start_end=add_start_end))
+            self.phoneticize(sentence, add_start_end=add_start_end))
 
     @property
     def vocab_size(self):
@@ -217,9 +215,9 @@ class ARPABETWithStress(Phonetics):
         'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D',
         'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2',
         'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K',
-        'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P',
-        'R', 'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2',
-        'V', 'W', 'Y', 'Z', 'ZH'
+        'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R',
+        'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2', 'V',
+        'W', 'Y', 'Z', 'ZH'
     ]
     punctuations = [',', '.', '?', '!']
     symbols = phonemes + punctuations
@@ -294,8 +292,7 @@ class ARPABETWithStress(Phonetics):
             The list of pronunciation id sequence.
         """
         return self.numericalize(
-            self.phoneticize(
-                sentence, add_start_end=add_start_end))
+            self.phoneticize(sentence, add_start_end=add_start_end))
 
     @property
     def vocab_size(self):
diff --git a/parakeet/frontend/cn_frontend.py b/parakeet/frontend/cn_frontend.py
index 62172f0..84903fc 100644
--- a/parakeet/frontend/cn_frontend.py
+++ b/parakeet/frontend/cn_frontend.py
@@ -11,17 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
 
 import jieba.posseg as psg
-import numpy as np
-import paddle
-import re
 from g2pM import G2pM
-from parakeet.frontend.tone_sandhi import ToneSandhi
-from parakeet.frontend.cn_normalization.text_normlization import TextNormalizer
-from pypinyin import lazy_pinyin, Style
+from pypinyin import lazy_pinyin
+from pypinyin import Style
 
+from parakeet.frontend.cn_normalization.text_normlization import TextNormalizer
 from parakeet.frontend.generate_lexicon import generate_lexicon
+from parakeet.frontend.tone_sandhi import ToneSandhi
 
 
 class Frontend():
diff --git a/parakeet/frontend/cn_normalization/__init__.py b/parakeet/frontend/cn_normalization/__init__.py
index b1471d6..abf198b 100644
--- a/parakeet/frontend/cn_normalization/__init__.py
+++ b/parakeet/frontend/cn_normalization/__init__.py
@@ -11,5 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from parakeet.frontend.cn_normalization.text_normlization import *
\ No newline at end of file
diff --git a/parakeet/frontend/cn_normalization/chronology.py b/parakeet/frontend/cn_normalization/chronology.py
index 157d4ca..1d9520e 100644
--- a/parakeet/frontend/cn_normalization/chronology.py
+++ b/parakeet/frontend/cn_normalization/chronology.py
@@ -11,10 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import re
 
-from .num import verbalize_cardinal, verbalize_digit, num2str, DIGITS
+from .num import DIGITS
+from .num import num2str
+from .num import verbalize_cardinal
+from .num import verbalize_digit
 
 
 def _time_num2str(num_string: str) -> str:
diff --git a/parakeet/frontend/cn_normalization/constants.py b/parakeet/frontend/cn_normalization/constants.py
index d1ae42b..5d2b0b3 100644
--- a/parakeet/frontend/cn_normalization/constants.py
+++ b/parakeet/frontend/cn_normalization/constants.py
@@ -11,9 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import re
 import string
+
 from pypinyin.constants import SUPPORT_UCS4
 
 # 全角半角转换
@@ -32,10 +32,7 @@ F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits}
 H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
 
 # 标点符号全角 -> 半角映射表 (num: 32)
-F2H_PUNCTUATIONS = {
-    chr(ord(char) + 65248): char
-    for char in string.punctuation
-}
+F2H_PUNCTUATIONS = {chr(ord(char) + 65248): char for char in string.punctuation}
 # 标点符号半角 -> 全角映射表
 H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
 
diff --git a/parakeet/frontend/cn_normalization/num.py b/parakeet/frontend/cn_normalization/num.py
index ca6ee80..7cc36d7 100644
--- a/parakeet/frontend/cn_normalization/num.py
+++ b/parakeet/frontend/cn_normalization/num.py
@@ -15,7 +15,6 @@
 Rules to verbalize numbers into Chinese characters.
 https://zh.wikipedia.org/wiki/中文数字#現代中文
 """
-
 import re
 from collections import OrderedDict
 from typing import List
diff --git a/parakeet/frontend/cn_normalization/phonecode.py b/parakeet/frontend/cn_normalization/phonecode.py
index 354e463..437348c 100644
--- a/parakeet/frontend/cn_normalization/phonecode.py
+++ b/parakeet/frontend/cn_normalization/phonecode.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import re
 
 from .num import verbalize_digit
@@ -32,14 +31,12 @@ def phone2str(phone_string: str, mobile=True) -> str:
     if mobile:
         sp_parts = phone_string.strip('+').split()
         result = ''.join(
-            [verbalize_digit(
-                part, alt_one=True) for part in sp_parts])
+            [verbalize_digit(part, alt_one=True) for part in sp_parts])
         return result
     else:
         sil_parts = phone_string.split('-')
         result = ''.join(
-            [verbalize_digit(
-                part, alt_one=True) for part in sil_parts])
+            [verbalize_digit(part, alt_one=True) for part in sil_parts])
         return result
 
 
diff --git a/parakeet/frontend/cn_normalization/quantifier.py b/parakeet/frontend/cn_normalization/quantifier.py
index 0a4bcaf..f40867f 100644
--- a/parakeet/frontend/cn_normalization/quantifier.py
+++ b/parakeet/frontend/cn_normalization/quantifier.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import re
 
 from .num import num2str
diff --git a/parakeet/frontend/cn_normalization/text_normlization.py b/parakeet/frontend/cn_normalization/text_normlization.py
index d55f00a..9bb7fc0 100644
--- a/parakeet/frontend/cn_normalization/text_normlization.py
+++ b/parakeet/frontend/cn_normalization/text_normlization.py
@@ -11,16 +11,37 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import re
 from typing import List
 
-from .chronology import RE_TIME, RE_DATE, RE_DATE2
-from .chronology import replace_time, replace_date, replace_date2
-from .constants import F2H_ASCII_LETTERS, F2H_DIGITS, F2H_SPACE
-from .num import RE_NUMBER, RE_FRAC, RE_PERCENTAGE, RE_RANGE, RE_INTEGER, RE_DEFAULT_NUM, RE_DECIMAL_NUM, RE_POSITIVE_QUANTIFIERS
-from .num import replace_number, replace_frac, replace_percentage, replace_range, replace_default_num, replace_negative_num, replace_positive_quantifier
-from .phonecode import RE_MOBILE_PHONE, RE_TELEPHONE, replace_phone, replace_mobile
+from .chronology import RE_DATE
+from .chronology import RE_DATE2
+from .chronology import RE_TIME
+from .chronology import replace_date
+from .chronology import replace_date2
+from .chronology import replace_time
+from .constants import F2H_ASCII_LETTERS
+from .constants import F2H_DIGITS
+from .constants import F2H_SPACE
+from .num import RE_DECIMAL_NUM
+from .num import RE_DEFAULT_NUM
+from .num import RE_FRAC
+from .num import RE_INTEGER
+from .num import RE_NUMBER
+from .num import RE_PERCENTAGE
+from .num import RE_POSITIVE_QUANTIFIERS
+from .num import RE_RANGE
+from .num import replace_default_num
+from .num import replace_frac
+from .num import replace_negative_num
+from .num import replace_number
+from .num import replace_percentage
+from .num import replace_positive_quantifier
+from .num import replace_range
+from .phonecode import RE_MOBILE_PHONE
+from .phonecode import RE_TELEPHONE
+from .phonecode import replace_mobile
+from .phonecode import replace_phone
 from .quantifier import RE_TEMPERATURE
 from .quantifier import replace_temperature
 
diff --git a/parakeet/frontend/generate_lexicon.py b/parakeet/frontend/generate_lexicon.py
index eae2fde..155e159 100644
--- a/parakeet/frontend/generate_lexicon.py
+++ b/parakeet/frontend/generate_lexicon.py
@@ -18,8 +18,6 @@ than words are used in transcriptions produced by `reorganize_baker.py`.
 We make this choice to better leverage other software for chinese text to 
 pinyin tools like pypinyin. This is the convention for G2P in Chinese.
 """
-
-import argparse
 import re
 from collections import OrderedDict
 
@@ -41,10 +39,10 @@ SPECIALS = ['sil', 'sp']
 def rule(C, V, R, T):
     """Generate a syllable given the initial, the final, erhua indicator, and tone.
     Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu)
-    
+
     Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 
     'u' in syllables when certain conditions are satisfied.
-    
+
     'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'.
     Erhua is is possibly applied to every finals, except for finals that already ends with 'r'.
     When a syllable is impossible or does not have any characters with this pronunciation, return None
@@ -86,8 +84,8 @@ def rule(C, V, R, T):
         return None
 
     # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼
-    if V in ['ua', 'uai', 'uang'
-             ] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
+    if V in ['ua', 'uai',
+             'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
         return None
 
     # sh 和 ong 不能拼
diff --git a/parakeet/frontend/normalizer/__init__.py b/parakeet/frontend/normalizer/__init__.py
index 37fd580..abf198b 100644
--- a/parakeet/frontend/normalizer/__init__.py
+++ b/parakeet/frontend/normalizer/__init__.py
@@ -11,6 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from parakeet.frontend.normalizer.normalizer import *
-from parakeet.frontend.normalizer.numbers import *
diff --git a/parakeet/frontend/normalizer/normalizer.py b/parakeet/frontend/normalizer/normalizer.py
index 6f8f5ce..795607e 100644
--- a/parakeet/frontend/normalizer/normalizer.py
+++ b/parakeet/frontend/normalizer/normalizer.py
@@ -11,10 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import re
 import unicodedata
 from builtins import str as unicode
+
 from parakeet.frontend.normalizer.numbers import normalize_numbers
 
 
diff --git a/parakeet/frontend/normalizer/numbers.py b/parakeet/frontend/normalizer/numbers.py
index e693691..564fb9b 100644
--- a/parakeet/frontend/normalizer/numbers.py
+++ b/parakeet/frontend/normalizer/numbers.py
@@ -11,11 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 # number expansion is not that easy
-import inflect
 import re
 
+import inflect
+
 _inflect = inflect.engine()
 _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
 _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
diff --git a/parakeet/frontend/phonectic.py b/parakeet/frontend/phonectic.py
index 1ac0912..81674bf 100644
--- a/parakeet/frontend/phonectic.py
+++ b/parakeet/frontend/phonectic.py
@@ -11,16 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from abc import ABC
+from abc import abstractmethod
 
-from abc import ABC, abstractmethod
-from typing import Union
 from g2p_en import G2p
 from g2pM import G2pM
+
 from parakeet.frontend import Vocab
+from parakeet.frontend.normalizer.normalizer import normalize
+from parakeet.frontend.punctuation import get_punctuations
+
 # discard opencc untill we find an easy solution to install it on windows
 # from opencc import OpenCC
-from parakeet.frontend.punctuation import get_punctuations
-from parakeet.frontend.normalizer.normalizer import normalize
 
 __all__ = ["Phonetics", "English", "EnglishCharacter", "Chinese"]
 
@@ -65,14 +67,14 @@ class English(Phonetics):
         start = self.vocab.start_symbol
         end = self.vocab.end_symbol
         phonemes = ([] if start is None else [start]) \
-                 + self.backend(sentence) \
-                 + ([] if end is None else [end])
+                   + self.backend(sentence) \
+                   + ([] if end is None else [end])
         phonemes = [item for item in phonemes if item in self.vocab.stoi]
         return phonemes
 
     def numericalize(self, phonemes):
         """ Convert pronunciation sequence into pronunciation id sequence.
-        
+
         Parameters
         -----------
         phonemes: List[str]
@@ -91,7 +93,7 @@ class English(Phonetics):
 
     def reverse(self, ids):
         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
-        
+
         Parameters
         -----------
         ids: List[int]
@@ -183,7 +185,7 @@ class EnglishCharacter(Phonetics):
         ----------
         str
             The input text sequence.
-        
+
         """
         return [self.vocab.reverse(i) for i in ids]
 
@@ -244,8 +246,8 @@ class Chinese(Phonetics):
         start = self.vocab.start_symbol
         end = self.vocab.end_symbol
         phonemes = ([] if start is None else [start]) \
-                 + phonemes \
-                 + ([] if end is None else [end])
+                   + phonemes \
+                   + ([] if end is None else [end])
         return self._filter_symbols(phonemes)
 
     def _filter_symbols(self, phonemes):
@@ -261,7 +263,7 @@ class Chinese(Phonetics):
 
     def numericalize(self, phonemes):
         """ Convert pronunciation sequence into pronunciation id sequence.
-        
+
         Parameters
         -----------
         phonemes: List[str]
@@ -298,7 +300,7 @@ class Chinese(Phonetics):
 
     def reverse(self, ids):
         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
-        
+
         Parameters
         -----------
         ids: List[int]
diff --git a/parakeet/frontend/pinyin.py b/parakeet/frontend/pinyin.py
index 958fd88..503bfd5 100644
--- a/parakeet/frontend/pinyin.py
+++ b/parakeet/frontend/pinyin.py
@@ -19,13 +19,15 @@ text -> pinyin to other part of a TTS system. Other NLP techniques may be used
 (e.g. tokenization, tagging, NER...)
 """
 import re
+from itertools import product
+
+from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin
+from pypinyin.core import DefaultConverter
+from pypinyin.core import Pinyin
+from pypinyin.core import Style
+
 from parakeet.frontend.phonectic import Phonetics
 from parakeet.frontend.vocab import Vocab
-import pypinyin
-from pypinyin.core import Pinyin, Style
-from pypinyin.core import DefaultConverter
-from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin
-from itertools import product
 
 _punctuations = ['，', '。', '？', '！']
 _initials = [
@@ -33,10 +35,10 @@ _initials = [
     'ch', 'sh', 'r', 'z', 'c', 's'
 ]
 _finals = [
-    'ii', 'iii', 'a', 'o', 'e', 'ea', 'ai', 'ei', 'ao', 'ou', 'an', 'en',
-    'ang', 'eng', 'er', 'i', 'ia', 'io', 'ie', 'iai', 'iao', 'iou', 'ian',
-    'ien', 'iang', 'ieng', 'u', 'ua', 'uo', 'uai', 'uei', 'uan', 'uen', 'uang',
-    'ueng', 'v', 've', 'van', 'ven', 'veng'
+    'ii', 'iii', 'a', 'o', 'e', 'ea', 'ai', 'ei', 'ao', 'ou', 'an', 'en', 'ang',
+    'eng', 'er', 'i', 'ia', 'io', 'ie', 'iai', 'iao', 'iou', 'ian', 'ien',
+    'iang', 'ieng', 'u', 'ua', 'uo', 'uai', 'uei', 'uan', 'uen', 'uang', 'ueng',
+    'v', 've', 'van', 'ven', 'veng'
 ]
 _ernized_symbol = ['&r']
 _phones = _initials + _finals + _ernized_symbol + _punctuations
@@ -76,12 +78,12 @@ class ParakeetPinyin(Phonetics):
 
     def phoneticize(self, sentence, add_start_end=False):
         """ Normalize the input text sequence and convert it into pronunciation sequence.
-    
+
         Parameters
         -----------
         sentence: str
             The input text sequence.
-    
+
         Returns
         ----------
         List[str]
@@ -95,12 +97,12 @@ class ParakeetPinyin(Phonetics):
 
     def numericalize(self, phonemes, tones):
         """ Convert pronunciation sequence into pronunciation id sequence.
-        
+
         Parameters
         -----------
         phonemes: List[str]
             The list of pronunciation sequence.
-    
+
         Returns
         ----------
         List[int]
@@ -112,12 +114,12 @@ class ParakeetPinyin(Phonetics):
 
     def __call__(self, sentence, add_start_end=False):
         """ Convert the input text sequence into pronunciation id sequence.
-    
+
         Parameters
         -----------
         sentence: str
             The input text sequence.
-    
+
         Returns
         ----------
         List[str]
@@ -159,12 +161,12 @@ class ParakeetPinyinWithTone(Phonetics):
 
     def phoneticize(self, sentence, add_start_end=False):
         """ Normalize the input text sequence and convert it into pronunciation sequence.
-    
+
         Parameters
         -----------
         sentence: str
             The input text sequence.
-    
+
         Returns
         ----------
         List[str]
@@ -178,12 +180,12 @@ class ParakeetPinyinWithTone(Phonetics):
 
     def numericalize(self, phonemes):
         """ Convert pronunciation sequence into pronunciation id sequence.
-        
+
         Parameters
         -----------
         phonemes: List[str]
             The list of pronunciation sequence.
-    
+
         Returns
         ----------
         List[int]
@@ -194,12 +196,12 @@ class ParakeetPinyinWithTone(Phonetics):
 
     def __call__(self, sentence, add_start_end=False):
         """ Convert the input text sequence into pronunciation id sequence.
-    
+
         Parameters
         -----------
         sentence: str
             The input text sequence.
-    
+
         Returns
         ----------
         List[str]
@@ -232,17 +234,17 @@ def _convert_to_parakeet_convension(syllable):
     syllable = syllable.replace("ing", "ieng").replace("in", "ien")
 
     # expansion for un, ui, iu
-    syllable = syllable.replace("un","uen")\
-        .replace("ui", "uei")\
+    syllable = syllable.replace("un", "uen") \
+        .replace("ui", "uei") \
         .replace("iu", "iou")
 
     # rule for variants of i
-    syllable = syllable.replace("zi", "zii")\
-        .replace("ci", "cii")\
-        .replace("si", "sii")\
-        .replace("zhi", "zhiii")\
-        .replace("chi", "chiii")\
-        .replace("shi", "shiii")\
+    syllable = syllable.replace("zi", "zii") \
+        .replace("ci", "cii") \
+        .replace("si", "sii") \
+        .replace("zhi", "zhiii") \
+        .replace("chi", "chiii") \
+        .replace("shi", "shiii") \
         .replace("ri", "riii")
 
     # rule for y preceding i, u
@@ -252,8 +254,8 @@ def _convert_to_parakeet_convension(syllable):
     syllable = syllable.replace("wu", "u").replace("w", "u")
 
     # rule for v following j, q, x
-    syllable = syllable.replace("ju", "jv")\
-        .replace("qu", "qv")\
+    syllable = syllable.replace("ju", "jv") \
+        .replace("qu", "qv") \
         .replace("xu", "xv")
 
     return syllable + tone
diff --git a/parakeet/frontend/punctuation.py b/parakeet/frontend/punctuation.py
index 099e759..23636dc 100644
--- a/parakeet/frontend/punctuation.py
+++ b/parakeet/frontend/punctuation.py
@@ -12,9 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import abc
-import string
-
 __all__ = ["get_punctuations"]
 
 EN_PUNCT = [
diff --git a/parakeet/frontend/tone_sandhi.py b/parakeet/frontend/tone_sandhi.py
index 9dc3917..268a160 100644
--- a/parakeet/frontend/tone_sandhi.py
+++ b/parakeet/frontend/tone_sandhi.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from typing import List, Tuple
+from typing import List
+from typing import Tuple
 
 import jieba
 from pypinyin import lazy_pinyin
@@ -76,8 +76,7 @@ class ToneSandhi():
 
         # reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
         for j, item in enumerate(word):
-            if j - 1 >= 0 and item == word[j - 1] and pos[
-                    0] in {"n", "v", "a"}:
+            if j - 1 >= 0 and item == word[j - 1] and pos[0] in {"n", "v", "a"}:
                 finals[j] = finals[j][:-1] + "5"
         ge_idx = word.find("个")
         if len(word) >= 1 and word[-1] in "吧呢哈啊呐噻嘛吖嗨呐哦哒额滴哩哟喽啰耶喔诶":
@@ -125,8 +124,8 @@ class ToneSandhi():
         else:
             for i, char in enumerate(word):
                 # "不" before tone4 should be bu2, e.g. 不怕
-                if char == "不" and i + 1 < len(word) and finals[i + 1][
-                        -1] == "4":
+                if char == "不" and i + 1 < len(word) and finals[i +
+                                                                1][-1] == "4":
                     finals[i] = finals[i][:-1] + "2"
         return finals
 
@@ -266,12 +265,12 @@ class ToneSandhi():
         assert len(sub_finals_list) == len(seg)
         merge_last = [False] * len(seg)
         for i, (word, pos) in enumerate(seg):
-            if i - 1 >= 0 and self._all_tone_three(sub_finals_list[
-                    i - 1]) and self._all_tone_three(sub_finals_list[
-                        i]) and not merge_last[i - 1]:
+            if i - 1 >= 0 and self._all_tone_three(
+                    sub_finals_list[i - 1]) and self._all_tone_three(
+                        sub_finals_list[i]) and not merge_last[i - 1]:
                 # if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
-                if not self._is_reduplication(seg[i - 1][0]) and len(seg[
-                        i - 1][0]) + len(seg[i][0]) <= 3:
+                if not self._is_reduplication(seg[i - 1][0]) and len(
+                        seg[i - 1][0]) + len(seg[i][0]) <= 3:
                     new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
                     merge_last[i] = True
                 else:
@@ -299,8 +298,8 @@ class ToneSandhi():
             if i - 1 >= 0 and sub_finals_list[i - 1][-1][-1] == "3" and sub_finals_list[i][0][-1] == "3" and not \
                     merge_last[i - 1]:
                 # if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
-                if not self._is_reduplication(seg[i - 1][0]) and len(seg[
-                        i - 1][0]) + len(seg[i][0]) <= 3:
+                if not self._is_reduplication(seg[i - 1][0]) and len(
+                        seg[i - 1][0]) + len(seg[i][0]) <= 3:
                     new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
                     merge_last[i] = True
                 else:
diff --git a/parakeet/frontend/vocab.py b/parakeet/frontend/vocab.py
index a56cfb8..9ef6b13 100644
--- a/parakeet/frontend/vocab.py
+++ b/parakeet/frontend/vocab.py
@@ -11,9 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from typing import Dict, Iterable, List
 from collections import OrderedDict
+from typing import Iterable
 
 __all__ = ["Vocab"]
 
@@ -25,13 +24,13 @@ class Vocab(object):
     -----------
     symbols: Iterable[str]
         Common symbols.
-    
+
     padding_symbol: str, optional
         Symbol for pad. Defaults to "<pad>".
 
     unk_symbol: str, optional
         Symbol for unknow. Defaults to "<unk>"
-    
+
     start_symbol: str, optional
         Symbol for start. Defaults to "<s>"
 
diff --git a/parakeet/models/__init__.py b/parakeet/models/__init__.py
index 6cf65ec..abf198b 100644
--- a/parakeet/models/__init__.py
+++ b/parakeet/models/__init__.py
@@ -11,13 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-#from parakeet.models.clarinet import *
-from parakeet.models.waveflow import *
-#from parakeet.models.wavenet import *
-
-from parakeet.models.transformer_tts import *
-#from parakeet.models.deepvoice3 import *
-# from parakeet.models.fastspeech import *
-from parakeet.models.tacotron2 import *
-from parakeet.models.fastspeech2 import *
diff --git a/parakeet/models/fastspeech2.py b/parakeet/models/fastspeech2.py
index c351e92..daaba74 100644
--- a/parakeet/models/fastspeech2.py
+++ b/parakeet/models/fastspeech2.py
@@ -12,19 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Fastspeech2 related modules for paddle"""
-
-from typing import Dict, Sequence, Tuple
+from typing import Sequence
+from typing import Tuple
 
 import paddle
 from paddle import nn
-from parakeet.modules.fastspeech2_predictor.duration_predictor import DurationPredictor, DurationPredictorLoss
+from typeguard import check_argument_types
+
+from parakeet.modules.fastspeech2_predictor.duration_predictor import DurationPredictor
+from parakeet.modules.fastspeech2_predictor.duration_predictor import DurationPredictorLoss
 from parakeet.modules.fastspeech2_predictor.length_regulator import LengthRegulator
 from parakeet.modules.fastspeech2_predictor.postnet import Postnet
 from parakeet.modules.fastspeech2_predictor.variance_predictor import VariancePredictor
-from parakeet.modules.fastspeech2_transformer.embedding import PositionalEncoding, ScaledPositionalEncoding
+from parakeet.modules.fastspeech2_transformer.embedding import PositionalEncoding
+from parakeet.modules.fastspeech2_transformer.embedding import ScaledPositionalEncoding
 from parakeet.modules.fastspeech2_transformer.encoder import Encoder as TransformerEncoder
-from parakeet.modules.nets_utils import initialize, make_non_pad_mask, make_pad_mask
-from typeguard import check_argument_types
+from parakeet.modules.nets_utils import initialize
+from parakeet.modules.nets_utils import make_non_pad_mask
+from parakeet.modules.nets_utils import make_pad_mask
 
 
 class FastSpeech2(nn.Layer):
@@ -293,9 +298,8 @@ class FastSpeech2(nn.Layer):
             xs, ilens, ys, olens, ds, ps, es, is_inference=False)
         # modify mod part of groundtruth
         if self.reduction_factor > 1:
-            olens = paddle.to_tensor([
-                olen - olen % self.reduction_factor for olen in olens.numpy()
-            ])
+            olens = paddle.to_tensor(
+                [olen - olen % self.reduction_factor for olen in olens.numpy()])
             max_olen = max(olens)
             ys = ys[:, :max_olen]
 
@@ -501,8 +505,7 @@ class FastSpeech2Inference(nn.Layer):
 class FastSpeech2Loss(nn.Layer):
     """Loss function module for FastSpeech2."""
 
-    def __init__(self,
-                 use_masking: bool=True,
+    def __init__(self, use_masking: bool=True,
                  use_weighted_masking: bool=False):
         """Initialize feed-forward Transformer loss module.
 
@@ -538,8 +541,8 @@ class FastSpeech2Loss(nn.Layer):
             ps: paddle.Tensor,
             es: paddle.Tensor,
             ilens: paddle.Tensor,
-            olens: paddle.Tensor, ) -> Tuple[paddle.Tensor, paddle.Tensor,
-                                             paddle.Tensor, paddle.Tensor]:
+            olens: paddle.Tensor,
+    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
         """Calculate forward propagation.
 
         Parameters
@@ -611,9 +614,9 @@ class FastSpeech2Loss(nn.Layer):
         # make weighted mask and apply it
         if self.use_weighted_masking:
             out_masks = make_non_pad_mask(olens).unsqueeze(-1)
-            out_weights = out_masks.cast(
-                dtype=paddle.float32) / out_masks.cast(
-                    dtype=paddle.float32).sum(axis=1, keepdim=True)
+            out_weights = out_masks.cast(dtype=paddle.float32) / out_masks.cast(
+                dtype=paddle.float32).sum(
+                    axis=1, keepdim=True)
             out_weights /= ys.shape[0] * ys.shape[2]
             duration_masks = make_non_pad_mask(ilens)
             duration_weights = (duration_masks.cast(dtype=paddle.float32) /
diff --git a/parakeet/models/lstm_speaker_encoder.py b/parakeet/models/lstm_speaker_encoder.py
index 529f27b..3372b21 100644
--- a/parakeet/models/lstm_speaker_encoder.py
+++ b/parakeet/models/lstm_speaker_encoder.py
@@ -11,17 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import numpy as np
 import paddle
 from paddle import nn
-from paddle.fluid.param_attr import ParamAttr
 from paddle.nn import functional as F
 from paddle.nn import initializer as I
-
 from scipy.interpolate import interp1d
-from sklearn.metrics import roc_curve
 from scipy.optimize import brentq
+from sklearn.metrics import roc_curve
 
 
 class LSTMSpeakerEncoder(nn.Layer):
@@ -81,8 +78,7 @@ class LSTMSpeakerEncoder(nn.Layer):
         # print("p1: ", p1.shape)
         p2 = paddle.bmm(
             embeds.reshape([-1, 1, embed_dim]),
-            normalized_centroids_excl.reshape(
-                [-1, embed_dim, 1]))  # (NM, 1, 1)
+            normalized_centroids_excl.reshape([-1, embed_dim, 1]))  # (NM, 1, 1)
         p2 = p2.reshape([-1])  # （NM)
 
         # begin: alternative implementation for scatter
@@ -94,9 +90,8 @@ class LSTMSpeakerEncoder(nn.Layer):
             index = index * speakers_per_batch + paddle.arange(
                 0, speakers_per_batch, dtype="int64").unsqueeze(-1)
             index = paddle.reshape(index, [-1])
-        ones = paddle.ones([
-            speakers_per_batch * utterances_per_speaker * speakers_per_batch
-        ])
+        ones = paddle.ones(
+            [speakers_per_batch * utterances_per_speaker * speakers_per_batch])
         zeros = paddle.zeros_like(index, dtype=ones.dtype)
         mask_p1 = paddle.scatter(ones, index, zeros)
         p = p1 * mask_p1 + (1 - mask_p1) * paddle.scatter(ones, index, p2)
@@ -113,6 +108,9 @@ class LSTMSpeakerEncoder(nn.Layer):
             g = p._grad_ivar()
             g[...] = g * 0.01
 
+    def inv_argmax(self, i, num):
+        return np.eye(1, num, i, dtype=np.int)[0]
+
     def loss(self, embeds):
         """
         Computes the softmax loss according the section 2.1 of GE2E.
@@ -138,8 +136,8 @@ class LSTMSpeakerEncoder(nn.Layer):
         # EER (not backpropagated)
         with paddle.no_grad():
             ground_truth = target.numpy()
-            inv_argmax = lambda i: np.eye(1, speakers_per_batch, i, dtype=np.int)[0]
-            labels = np.array([inv_argmax(i) for i in ground_truth])
+            labels = np.array(
+                [self.inv_argmax(i, speakers_per_batch) for i in ground_truth])
             preds = sim_matrix.numpy()
 
             # Snippet from https://yangcha.github.io/EER-ROC/
diff --git a/parakeet/models/parallel_wavegan.py b/parakeet/models/parallel_wavegan.py
index cd4539f..bb21465 100644
--- a/parakeet/models/parallel_wavegan.py
+++ b/parakeet/models/parallel_wavegan.py
@@ -11,13 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import math
-from typing import List, Dict, Any, Union, Optional, Tuple
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Optional
 
 import numpy as np
 import paddle
-from paddle import Tensor
 from paddle import nn
 from paddle.nn import functional as F
 
@@ -63,8 +64,8 @@ class Stretch2D(nn.Layer):
 
 
 class UpsampleNet(nn.Layer):
-    """A Layer to upsample spectrogram by applying consecutive stretch and 
-    convolutions. 
+    """A Layer to upsample spectrogram by applying consecutive stretch and
+    convolutions.
 
     Parameters
     ----------
@@ -81,10 +82,10 @@ class UpsampleNet(nn.Layer):
     use_causal_conv : bool, optional
         Whether to use causal padding before convolution, by default False
 
-        If True, Causal padding is used along the time axis, i.e. padding 
-        amount is ``receptive field - 1`` and 0 for before and after, 
+        If True, Causal padding is used along the time axis, i.e. padding
+        amount is ``receptive field - 1`` and 0 for before and after,
         respectively.
-        
+
         If False, "same" padding is used along the time axis.
     """
 
@@ -158,7 +159,7 @@ class ConvInUpsampleNet(nn.Layer):
     aux_context_window : int, optional
         Context window of the first 1D convolution applied to the input. It 
         related to the kernel size of the convolution, by default 0
-        
+
         If use causal convolution, the kernel size is ``window + 1``, else
         the kernel size is ``2 * window + 1``.
     use_causal_conv : bool, optional
@@ -167,7 +168,7 @@ class ConvInUpsampleNet(nn.Layer):
         If True, Causal padding is used along the time axis, i.e. padding 
         amount is ``receptive field - 1`` and 0 for before and after, 
         respectively.
-        
+
         If False, "same" padding is used along the time axis.
     """
 
@@ -276,10 +277,7 @@ class ResidualBlock(nn.Layer):
 
         gate_out_channels = gate_channels // 2
         self.conv1x1_out = nn.Conv1D(
-            gate_out_channels,
-            residual_channels,
-            kernel_size=1,
-            bias_attr=bias)
+            gate_out_channels, residual_channels, kernel_size=1, bias_attr=bias)
         self.conv1x1_skip = nn.Conv1D(
             gate_out_channels, skip_channels, kernel_size=1, bias_attr=bias)
 
@@ -428,13 +426,18 @@ class PWGGenerator(nn.Layer):
                 use_causal_conv=use_causal_conv)
             self.conv_layers.append(conv)
 
-        self.last_conv_layers = nn.Sequential(
-            nn.ReLU(),
-            nn.Conv1D(
-                skip_channels, skip_channels, 1, bias_attr=True),
-            nn.ReLU(),
-            nn.Conv1D(
-                skip_channels, out_channels, 1, bias_attr=True))
+        self.last_conv_layers = nn.Sequential(nn.ReLU(),
+                                              nn.Conv1D(
+                                                  skip_channels,
+                                                  skip_channels,
+                                                  1,
+                                                  bias_attr=True),
+                                              nn.ReLU(),
+                                              nn.Conv1D(
+                                                  skip_channels,
+                                                  out_channels,
+                                                  1,
+                                                  bias_attr=True))
 
         if use_weight_norm:
             self.apply_weight_norm()
@@ -548,18 +551,18 @@ class PWGDiscriminator(nn.Layer):
         by default True
     """
 
-    def __init__(self,
-                 in_channels: int=1,
-                 out_channels: int=1,
-                 kernel_size: int=3,
-                 layers: int=10,
-                 conv_channels: int=64,
-                 dilation_factor: int=1,
-                 nonlinear_activation: str="LeakyReLU",
-                 nonlinear_activation_params: Dict[
-                     str, Any]={"negative_slope": 0.2},
-                 bias: bool=True,
-                 use_weight_norm: bool=True):
+    def __init__(
+            self,
+            in_channels: int=1,
+            out_channels: int=1,
+            kernel_size: int=3,
+            layers: int=10,
+            conv_channels: int=64,
+            dilation_factor: int=1,
+            nonlinear_activation: str="LeakyReLU",
+            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.2},
+            bias: bool=True,
+            use_weight_norm: bool=True):
         super().__init__()
         assert kernel_size % 2 == 1
         assert dilation_factor > 0
@@ -693,8 +696,7 @@ class ResidualPWGDiscriminator(nn.Layer):
         layers_per_stack = layers // stacks
 
         self.first_conv = nn.Sequential(
-            nn.Conv1D(
-                in_channels, residual_channels, 1, bias_attr=True),
+            nn.Conv1D(in_channels, residual_channels, 1, bias_attr=True),
             getattr(nn, nonlinear_activation)(**nonlinear_activation_params))
 
         self.conv_layers = nn.LayerList()
@@ -714,11 +716,9 @@ class ResidualPWGDiscriminator(nn.Layer):
 
         self.last_conv_layers = nn.Sequential(
             getattr(nn, nonlinear_activation)(**nonlinear_activation_params),
-            nn.Conv1D(
-                skip_channels, skip_channels, 1, bias_attr=True),
+            nn.Conv1D(skip_channels, skip_channels, 1, bias_attr=True),
             getattr(nn, nonlinear_activation)(**nonlinear_activation_params),
-            nn.Conv1D(
-                skip_channels, out_channels, 1, bias_attr=True))
+            nn.Conv1D(skip_channels, out_channels, 1, bias_attr=True))
 
         if use_weight_norm:
             self.apply_weight_norm()
diff --git a/parakeet/models/speedyspeech.py b/parakeet/models/speedyspeech.py
index bd7055b..e98e633 100644
--- a/parakeet/models/speedyspeech.py
+++ b/parakeet/models/speedyspeech.py
@@ -11,18 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import math
-
-import numpy as np
 import paddle
-from paddle import Tensor
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
-from parakeet.modules.positional_encoding import sinusoid_position_encoding
 from parakeet.modules.expansion import expand
+from parakeet.modules.positional_encoding import sinusoid_position_encoding
 
 
 class ResidualBlock(nn.Layer):
@@ -38,8 +31,7 @@ class ResidualBlock(nn.Layer):
                     padding="same",
                     data_format="NLC"),
                 nn.ReLU(),
-                nn.BatchNorm1D(
-                    channels, data_format="NLC"), ) for _ in range(n)
+                nn.BatchNorm1D(channels, data_format="NLC"), ) for _ in range(n)
         ]
         self.blocks = nn.Sequential(*blocks)
 
@@ -95,16 +87,14 @@ class SpeedySpeechEncoder(nn.Layer):
             nn.Linear(hidden_size, hidden_size),
             nn.ReLU(), )
         res_blocks = [
-            ResidualBlock(
-                hidden_size, kernel_size, d, n=2) for d in dilations
+            ResidualBlock(hidden_size, kernel_size, d, n=2) for d in dilations
         ]
         self.res_blocks = nn.Sequential(*res_blocks)
 
         self.postnet1 = nn.Sequential(nn.Linear(hidden_size, hidden_size))
         self.postnet2 = nn.Sequential(
             nn.ReLU(),
-            nn.BatchNorm1D(
-                hidden_size, data_format="NLC"),
+            nn.BatchNorm1D(hidden_size, data_format="NLC"),
             nn.Linear(hidden_size, hidden_size), )
 
     def forward(self, text, tones):
@@ -120,13 +110,9 @@ class DurationPredictor(nn.Layer):
     def __init__(self, hidden_size):
         super().__init__()
         self.layers = nn.Sequential(
-            ResidualBlock(
-                hidden_size, 4, 1, n=1),
-            ResidualBlock(
-                hidden_size, 3, 1, n=1),
-            ResidualBlock(
-                hidden_size, 1, 1, n=1),
-            nn.Linear(hidden_size, 1))
+            ResidualBlock(hidden_size, 4, 1, n=1),
+            ResidualBlock(hidden_size, 3, 1, n=1),
+            ResidualBlock(hidden_size, 1, 1, n=1), nn.Linear(hidden_size, 1))
 
     def forward(self, x):
         return paddle.squeeze(self.layers(x), -1)
@@ -136,15 +122,13 @@ class SpeedySpeechDecoder(nn.Layer):
     def __init__(self, hidden_size, output_size, kernel_size, dilations):
         super().__init__()
         res_blocks = [
-            ResidualBlock(
-                hidden_size, kernel_size, d, n=2) for d in dilations
+            ResidualBlock(hidden_size, kernel_size, d, n=2) for d in dilations
         ]
         self.res_blocks = nn.Sequential(*res_blocks)
 
         self.postnet1 = nn.Sequential(nn.Linear(hidden_size, hidden_size))
         self.postnet2 = nn.Sequential(
-            ResidualBlock(
-                hidden_size, kernel_size, 1, n=2),
+            ResidualBlock(hidden_size, kernel_size, 1, n=2),
             nn.Linear(hidden_size, output_size))
 
     def forward(self, x):
diff --git a/parakeet/models/tacotron2.py b/parakeet/models/tacotron2.py
index 5b18aab..ab94faf 100644
--- a/parakeet/models/tacotron2.py
+++ b/parakeet/models/tacotron2.py
@@ -11,20 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import math
 
 import paddle
 from paddle import nn
+from paddle.fluid.layers import sequence_mask
 from paddle.nn import functional as F
 from paddle.nn import initializer as I
-from paddle.fluid.layers import sequence_mask
+from tqdm import trange
 
-from parakeet.modules.conv import Conv1dBatchNorm
 from parakeet.modules.attention import LocationSensitiveAttention
+from parakeet.modules.conv import Conv1dBatchNorm
 from parakeet.modules.losses import guided_attention_loss
 from parakeet.utils import checkpoint
-from tqdm import trange
 
 __all__ = ["Tacotron2", "Tacotron2Loss"]
 
@@ -74,8 +73,7 @@ class DecoderPreNet(nn.Layer):
 
         """
 
-        x = F.dropout(
-            F.relu(self.linear1(x)), self.dropout_rate, training=True)
+        x = F.dropout(F.relu(self.linear1(x)), self.dropout_rate, training=True)
         output = F.dropout(
             F.relu(self.linear2(x)), self.dropout_rate, training=True)
         return output
@@ -745,10 +743,10 @@ class Tacotron2(nn.Layer):
 
         if global_condition is not None:
             global_condition = global_condition.unsqueeze(1)
-            global_condition = paddle.expand(
-                global_condition, [-1, encoder_outputs.shape[1], -1])
-            encoder_outputs = paddle.concat(
-                [encoder_outputs, global_condition], -1)
+            global_condition = paddle.expand(global_condition,
+                                             [-1, encoder_outputs.shape[1], -1])
+            encoder_outputs = paddle.concat([encoder_outputs, global_condition],
+                                            -1)
 
         # [B, T_enc, 1]
         mask = sequence_mask(
@@ -813,10 +811,10 @@ class Tacotron2(nn.Layer):
 
         if global_condition is not None:
             global_condition = global_condition.unsqueeze(1)
-            global_condition = paddle.expand(
-                global_condition, [-1, encoder_outputs.shape[1], -1])
-            encoder_outputs = paddle.concat(
-                [encoder_outputs, global_condition], -1)
+            global_condition = paddle.expand(global_condition,
+                                             [-1, encoder_outputs.shape[1], -1])
+            encoder_outputs = paddle.concat([encoder_outputs, global_condition],
+                                            -1)
         if self.decoder.use_stop_token:
             mel_outputs, alignments, stop_logits = self.decoder.infer(
                 encoder_outputs, max_decoder_steps=max_decoder_steps)
diff --git a/parakeet/models/transformer_tts.py b/parakeet/models/transformer_tts.py
index eed1fbe..4ec943a 100644
--- a/parakeet/models/transformer_tts.py
+++ b/parakeet/models/transformer_tts.py
@@ -11,22 +11,26 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import math
-from tqdm import trange
+
 import paddle
 from paddle import nn
 from paddle.nn import functional as F
 from paddle.nn import initializer as I
+from tqdm import trange
 
 import parakeet
-from parakeet.modules.attention import _split_heads, _concat_heads, drop_head, scaled_dot_product_attention
-from parakeet.modules.transformer import PositionwiseFFN
-from parakeet.modules import masking
-from parakeet.modules.conv import Conv1dBatchNorm
-from parakeet.modules import positional_encoding as pe
 from parakeet.modules import losses as L
-from parakeet.utils import checkpoint, scheduler
+from parakeet.modules import masking
+from parakeet.modules import positional_encoding as pe
+from parakeet.modules.attention import _concat_heads
+from parakeet.modules.attention import _split_heads
+from parakeet.modules.attention import drop_head
+from parakeet.modules.attention import scaled_dot_product_attention
+from parakeet.modules.conv import Conv1dBatchNorm
+from parakeet.modules.transformer import PositionwiseFFN
+from parakeet.utils import checkpoint
+from parakeet.utils import scheduler
 
 __all__ = ["TransformerTTS", "TransformerTTSLoss"]
 
@@ -404,16 +408,14 @@ class TransformerTTS(nn.Layer):
             self.toned = False
         # position encoding matrix may be extended later
         self.encoder_pe = pe.sinusoid_position_encoding(1000, d_encoder)
-        self.encoder_pe_scalar = self.create_parameter(
-            [1], attr=I.Constant(1.))
+        self.encoder_pe_scalar = self.create_parameter([1], attr=I.Constant(1.))
         self.encoder = TransformerEncoder(d_encoder, n_heads, d_ffn,
                                           encoder_layers, dropout)
 
         # decoder
         self.decoder_prenet = MLPPreNet(d_mel, d_prenet, d_decoder, dropout)
         self.decoder_pe = pe.sinusoid_position_encoding(1000, d_decoder)
-        self.decoder_pe_scalar = self.create_parameter(
-            [1], attr=I.Constant(1.))
+        self.decoder_pe_scalar = self.create_parameter([1], attr=I.Constant(1.))
         self.decoder = TransformerDecoder(
             d_decoder,
             n_heads,
@@ -470,14 +472,13 @@ class TransformerTTS(nn.Layer):
             self.encoder_pe = pe.sinusoid_position_encoding(new_T,
                                                             self.d_encoder)
         pos_enc = self.encoder_pe[:T_enc, :]  # (T, C)
-        x = embed.scale(math.sqrt(
-            self.d_encoder)) + pos_enc * self.encoder_pe_scalar
+        x = embed.scale(
+            math.sqrt(self.d_encoder)) + pos_enc * self.encoder_pe_scalar
         x = F.dropout(x, self.dropout, training=self.training)
 
         # TODO(chenfeiyu): unsqueeze a decoder_time_steps=1 for the mask
         encoder_padding_mask = paddle.unsqueeze(
-            masking.id_mask(
-                text, self.padding_idx, dtype=x.dtype), 1)
+            masking.id_mask(text, self.padding_idx, dtype=x.dtype), 1)
         x, attention_weights = self.encoder(x, encoder_padding_mask,
                                             self.drop_n_heads)
         return x, attention_weights, encoder_padding_mask
@@ -492,8 +493,8 @@ class TransformerTTS(nn.Layer):
             self.decoder_pe = pe.sinusoid_position_encoding(new_T,
                                                             self.d_decoder)
         pos_enc = self.decoder_pe[:T_dec * self.r:self.r, :]
-        x = x.scale(math.sqrt(
-            self.d_decoder)) + pos_enc * self.decoder_pe_scalar
+        x = x.scale(
+            math.sqrt(self.d_decoder)) + pos_enc * self.decoder_pe_scalar
         x = F.dropout(x, self.dropout, training=self.training)
 
         no_future_mask = masking.future_mask(T_dec, dtype=input.dtype)
@@ -547,9 +548,8 @@ class TransformerTTS(nn.Layer):
             # stop condition: (if any ouput frame of the output multiframes hits the stop condition)
             # import pdb; pdb.set_trace()
             if paddle.any(
-                    paddle.argmax(
-                        stop_logits[0, -self.r:, :], axis=-1) ==
-                    self.stop_prob_index):
+                    paddle.argmax(stop_logits[0, -self.r:, :],
+                                  axis=-1) == self.stop_prob_index):
                 if verbose:
                     print("Hits stop condition.")
                 break
@@ -602,8 +602,7 @@ class TransformerTTSLoss(nn.Layer):
 
     def forward(self, mel_output, mel_intermediate, mel_target, stop_logits,
                 stop_probs):
-        mask = masking.feature_mask(
-            mel_target, axis=-1, dtype=mel_target.dtype)
+        mask = masking.feature_mask(mel_target, axis=-1, dtype=mel_target.dtype)
         mask1 = paddle.unsqueeze(mask, -1)
         mel_loss1 = L.masked_l1_loss(mel_output, mel_target, mask1)
         mel_loss2 = L.masked_l1_loss(mel_intermediate, mel_target, mask1)
diff --git a/parakeet/models/waveflow.py b/parakeet/models/waveflow.py
index e274cef..b6317bf 100644
--- a/parakeet/models/waveflow.py
+++ b/parakeet/models/waveflow.py
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import time
 import math
-from typing import List, Union, Tuple
+import time
+from typing import List
+from typing import Tuple
+from typing import Union
 
 import numpy as np
 import paddle
@@ -22,8 +23,8 @@ from paddle import nn
 from paddle.nn import functional as F
 from paddle.nn import initializer as I
 
-from parakeet.utils import checkpoint
 from parakeet.modules import geometry as geo
+from parakeet.utils import checkpoint
 
 __all__ = ["WaveFlow", "ConditionalWaveFlow", "WaveFlowLoss"]
 
@@ -120,7 +121,7 @@ class UpsampleNet(nn.LayerList):
         If trim_conv_artifact is ``True``, the output time steps is less
         than ``time_steps \* upsample_factors``.
         """
-        x = paddle.unsqueeze(x, 1)  #(B, C, T) -> (B, 1, C, T)
+        x = paddle.unsqueeze(x, 1)  # (B, C, T) -> (B, 1, C, T)
         for layer in self:
             x = layer(x)
             if trim_conv_artifact:
@@ -795,7 +796,7 @@ class ConditionalWaveFlow(nn.LayerList):
             The synthesized audio, where``T <= T_mel \* upsample_factors``.
         """
         start = time.time()
-        condition = self.encoder(mel, trim_conv_artifact=True)  #(B, C, T)
+        condition = self.encoder(mel, trim_conv_artifact=True)  # (B, C, T)
         batch_size, _, time_steps = condition.shape
         z = paddle.randn([batch_size, time_steps], dtype=mel.dtype)
         x = self.decoder.inverse(z, condition)
@@ -893,12 +894,12 @@ class WaveFlowLoss(nn.Layer):
 class ConditionalWaveFlow2Infer(ConditionalWaveFlow):
     def forward(self, mel):
         """Generate raw audio given mel spectrogram.
-     
+
         Parameters
         ----------
         mel : np.ndarray [shape=(C_mel, T_mel)]
-            Mel spectrogram of an utterance(in log-magnitude). 
- 
+            Mel spectrogram of an utterance(in log-magnitude).
+
         Returns
         -------
         np.ndarray [shape=(T,)]
diff --git a/parakeet/modules/__init__.py b/parakeet/modules/__init__.py
index 327cb7b..abf198b 100644
--- a/parakeet/modules/__init__.py
+++ b/parakeet/modules/__init__.py
@@ -11,11 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from parakeet.modules.attention import *
-from parakeet.modules.conv import *
-from parakeet.modules.geometry import *
-from parakeet.modules.losses import *
-from parakeet.modules.masking import *
-from parakeet.modules.positional_encoding import *
-from parakeet.modules.transformer import *
diff --git a/parakeet/modules/attention.py b/parakeet/modules/attention.py
index e91ea74..154625c 100644
--- a/parakeet/modules/attention.py
+++ b/parakeet/modules/attention.py
@@ -11,19 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import math
+
 import numpy as np
 import paddle
 from paddle import nn
 from paddle.nn import functional as F
 
 
-def scaled_dot_product_attention(q,
-                                 k,
-                                 v,
-                                 mask=None,
-                                 dropout=0.0,
+def scaled_dot_product_attention(q, k, v, mask=None, dropout=0.0,
                                  training=True):
     r"""Scaled dot product attention with masking. 
     
@@ -33,24 +29,19 @@ def scaled_dot_product_attention(q,
 
     Parameters
     -----------
-    
     q : Tensor [shape=(\*, T_q, d)]
         the query tensor.
-        
     k : Tensor [shape=(\*, T_k, d)]
         the key tensor.
-        
     v : Tensor [shape=(\*, T_k, d_v)]
         the value tensor.
-        
     mask : Tensor, [shape=(\*, T_q, T_k) or broadcastable shape], optional
         the mask tensor, zeros correspond to paddings. Defaults to None.
-    
+
     Returns
     ----------
-    out : Tensor [shape=(\*, T_q, d_v)] 
+    out : Tensor [shape=(\*, T_q, d_v)]
         the context vector.
-
     attn_weights : Tensor [shape=(\*, T_q, T_k)]
         the attention weights.
     """
@@ -74,10 +65,8 @@ def drop_head(x, drop_n_heads, training=True):
     ----------
     x : Tensor [shape=(batch_size, num_heads, time_steps, channels)]
         The input, multiple context vectors.
-        
     drop_n_heads : int [0<= drop_n_heads <= num_heads]
         Number of vectors to drop.
-        
     training : bool
         A flag indicating whether it is in training. If `False`, no dropout is 
         applied.
@@ -127,17 +116,14 @@ class MonoheadAttention(nn.Layer):
     ----------
     model_dim : int
         Feature size of the query.
-        
     dropout : float, optional
-        Dropout probability of scaled dot product attention and final context 
+        Dropout probability of scaled dot product attention and final context
         vector. Defaults to 0.0.
-        
     k_dim : int, optional
-        Feature size of the key of each scaled dot product attention. If not 
+        Feature size of the key of each scaled dot product attention. If not
         provided, it is set to `model_dim / num_heads`. Defaults to None.
-        
     v_dim : int, optional
-        Feature size of the key of each scaled dot product attention. If not 
+        Feature size of the key of each scaled dot product attention. If not
         provided, it is set to `model_dim / num_heads`. Defaults to None.
     """
 
@@ -162,23 +148,19 @@ class MonoheadAttention(nn.Layer):
         
         Parameters
         -----------
-        q : Tensor [shape=(batch_size, time_steps_q, model_dim)] 
+        q : Tensor [shape=(batch_size, time_steps_q, model_dim)]
             The queries.
-            
-        k : Tensor [shape=(batch_size, time_steps_k, model_dim)] 
+        k : Tensor [shape=(batch_size, time_steps_k, model_dim)]
             The keys.
-            
-        v : Tensor [shape=(batch_size, time_steps_k, model_dim)] 
+        v : Tensor [shape=(batch_size, time_steps_k, model_dim)]
             The values.
-            
         mask : Tensor [shape=(batch_size, times_steps_q, time_steps_k] or broadcastable shape
             The mask.
 
         Returns
         ----------
-        out : Tensor [shape=(batch_size, time_steps_q, model_dim)] 
+        out : Tensor [shape=(batch_size, time_steps_q, model_dim)]
             The context vector.
-            
         attention_weights : Tensor [shape=(batch_size, times_steps_q, time_steps_k)]
             The attention weights.
         """
@@ -200,20 +182,16 @@ class MultiheadAttention(nn.Layer):
     -----------
     model_dim: int
         The feature size of query.
-        
     num_heads : int
         The number of attention heads.
-        
     dropout : float, optional
-        Dropout probability of scaled dot product attention and final context 
+        Dropout probability of scaled dot product attention and final context
         vector. Defaults to 0.0.
-        
     k_dim : int, optional
-        Feature size of the key of each scaled dot product attention. If not 
+        Feature size of the key of each scaled dot product attention. If not
         provided, it is set to ``model_dim / num_heads``. Defaults to None.
-        
     v_dim : int, optional
-        Feature size of the key of each scaled dot product attention. If not 
+        Feature size of the key of each scaled dot product attention. If not
         provided, it is set to ``model_dim / num_heads``. Defaults to None.
 
     Raises
@@ -248,23 +226,19 @@ class MultiheadAttention(nn.Layer):
         
         Parameters
         -----------
-        q : Tensor [shape=(batch_size, time_steps_q, model_dim)] 
+        q : Tensor [shape=(batch_size, time_steps_q, model_dim)]
             The queries.
-            
-        k : Tensor [shape=(batch_size, time_steps_k, model_dim)] 
+        k : Tensor [shape=(batch_size, time_steps_k, model_dim)]
             The keys.
-            
-        v : Tensor [shape=(batch_size, time_steps_k, model_dim)] 
+        v : Tensor [shape=(batch_size, time_steps_k, model_dim)]
             The values.
-            
         mask : Tensor [shape=(batch_size, times_steps_q, time_steps_k] or broadcastable shape
             The mask.
 
         Returns
         ----------
-        out : Tensor [shape=(batch_size, time_steps_q, model_dim)] 
+        out : Tensor [shape=(batch_size, time_steps_q, model_dim)]
             The context vector.
-            
         attention_weights : Tensor [shape=(batch_size, times_steps_q, time_steps_k)]
             The attention weights.
         """
@@ -290,16 +264,12 @@ class LocationSensitiveAttention(nn.Layer):
     -----------
     d_query: int
         The feature size of query.
-        
     d_key : int
         The feature size of key.
-        
     d_attention : int
-        The feature size of dimension. 
-        
+        The feature size of dimension.
     location_filters : int
         Filter size of attention convolution.
-        
     location_kernel_size : int
         Kernel size of attention convolution.
     """
@@ -337,27 +307,22 @@ class LocationSensitiveAttention(nn.Layer):
         
         Parameters
         -----------
-        query : Tensor [shape=(batch_size, d_query)] 
+        query : Tensor [shape=(batch_size, d_query)]
             The queries.
-            
-        processed_key : Tensor [shape=(batch_size, time_steps_k, d_attention)] 
+        processed_key : Tensor [shape=(batch_size, time_steps_k, d_attention)]
             The keys after linear layer.
-            
-        value : Tensor [shape=(batch_size, time_steps_k, d_key)] 
+        value : Tensor [shape=(batch_size, time_steps_k, d_key)]
             The values.
-
         attention_weights_cat : Tensor [shape=(batch_size, time_step_k, 2)]
             Attention weights concat.
-            
         mask : Tensor, optional
             The mask. Shape should be (batch_size, times_steps_k, 1).
             Defaults to None.
 
         Returns
         ----------
-        attention_context : Tensor [shape=(batch_size, d_attention)] 
+        attention_context : Tensor [shape=(batch_size, d_attention)]
             The context vector.
-            
         attention_weights : Tensor [shape=(batch_size, time_steps_k)]
             The attention weights.
         """
diff --git a/parakeet/modules/audio.py b/parakeet/modules/audio.py
index c44aa66..926ce8f 100644
--- a/parakeet/modules/audio.py
+++ b/parakeet/modules/audio.py
@@ -11,20 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import librosa
+import numpy as np
 import paddle
+from librosa.util import pad_center
 from paddle import nn
 from paddle.nn import functional as F
 from scipy import signal
-import librosa
-from librosa.util import pad_center
-import numpy as np
 
 __all__ = ["quantize", "dequantize", "STFT", "MelScale"]
 
 
 def quantize(values, n_bands):
-    """Linearlly quantize a float Tensor in [-1, 1) to an interger Tensor in 
+    """Linearlly quantize a float Tensor in [-1, 1) to an interger Tensor in
     [0, n_bands).
 
     Parameters
@@ -33,7 +32,7 @@ def quantize(values, n_bands):
         The floating point value.
         
     n_bands : int
-        The number of bands. The output integer Tensor's value is in the range 
+        The number of bands. The output integer Tensor's value is in the range
         [0, n_bans).
 
     Returns
@@ -46,7 +45,7 @@ def quantize(values, n_bands):
 
 
 def dequantize(quantized, n_bands, dtype=None):
-    """Linearlly dequantize an integer Tensor into a float Tensor in the range 
+    """Linearlly dequantize an integer Tensor into a float Tensor in the range
     [-1, 1).
 
     Parameters
@@ -55,7 +54,7 @@ def dequantize(quantized, n_bands, dtype=None):
         The quantized value in the range [0, n_bands).
         
     n_bands : int
-        Number of bands. The input integer Tensor's value is in the range 
+        Number of bands. The input integer Tensor's value is in the range
         [0, n_bans).
         
     dtype : str, optional
@@ -73,43 +72,36 @@ def dequantize(quantized, n_bands, dtype=None):
 
 
 class STFT(nn.Layer):
-    """A module for computing stft transformation in a differentiable way. 
+    """A module for computing stft transformation in a differentiable way.
     
     Parameters
     ------------
     n_fft : int
         Number of samples in a frame.
-        
     hop_length : int
         Number of samples shifted between adjacent frames.
-        
     win_length : int
         Length of the window.
-        
     window : str, optional
-        Name of window function, see `scipy.signal.get_window` for more 
+        Name of window function, see `scipy.signal.get_window` for more
         details. Defaults to "hanning".
-        
     center : bool
         If True, the signal y is padded so that frame D[:, t] is centered 
         at y[t * hop_length]. If False, then D[:, t] begins at y[t * hop_length].
         Defaults to True.
-    
     pad_mode : string or function
-        If center=True, this argument is passed to np.pad for padding the edges 
-        of the signal y. By default (pad_mode="reflect"), y is padded on both 
-        sides with its own reflection, mirrored around its first and last 
+        If center=True, this argument is passed to np.pad for padding the edges
+        of the signal y. By default (pad_mode="reflect"), y is padded on both
+        sides with its own reflection, mirrored around its first and last
         sample respectively. If center=False, this argument is ignored.
-    
-        
         
     Notes
     -----------
-    It behaves like ``librosa.core.stft``. See ``librosa.core.stft`` for more 
+    It behaves like ``librosa.core.stft``. See ``librosa.core.stft`` for more
     details.
     
-    Given a audio which ``T`` samples, it the STFT transformation outputs a 
-    spectrum with (C, frames) and complex dtype, where ``C = 1 + n_fft / 2`` 
+    Given a audio which ``T`` samples, it the STFT transformation outputs a
+    spectrum with (C, frames) and complex dtype, where ``C = 1 + n_fft / 2``
     and ``frames = 1 + T // hop_lenghth``.
     
     Ony ``center`` and ``reflect`` padding is supported now.
@@ -144,19 +136,19 @@ class STFT(nn.Layer):
         # pad window to n_fft size
         if n_fft != win_length:
             window = pad_center(window, n_fft, mode="constant")
-            #lpad = (n_fft - win_length) // 2
-            #rpad = n_fft - win_length - lpad
-            #window = np.pad(window, ((lpad, pad), ), 'constant')
+            # lpad = (n_fft - win_length) // 2
+            # rpad = n_fft - win_length - lpad
+            # window = np.pad(window, ((lpad, pad), ), 'constant')
 
         # calculate weights
-        #r = np.arange(0, n_fft)
-        #M = np.expand_dims(r, -1) * np.expand_dims(r, 0)
-        #w_real = np.reshape(window *
-        #np.cos(2 * np.pi * M / n_fft)[:self.n_bin],
-        #(self.n_bin, 1, self.n_fft))
-        #w_imag = np.reshape(window *
-        #np.sin(-2 * np.pi * M / n_fft)[:self.n_bin],
-        #(self.n_bin, 1, self.n_fft))
+        # r = np.arange(0, n_fft)
+        # M = np.expand_dims(r, -1) * np.expand_dims(r, 0)
+        # w_real = np.reshape(window *
+        # np.cos(2 * np.pi * M / n_fft)[:self.n_bin],
+        # (self.n_bin, 1, self.n_fft))
+        # w_imag = np.reshape(window *
+        # np.sin(-2 * np.pi * M / n_fft)[:self.n_bin],
+        # (self.n_bin, 1, self.n_fft))
         weight = np.fft.fft(np.eye(n_fft))[:self.n_bin]
         w_real = weight.real
         w_imag = weight.imag
@@ -174,17 +166,18 @@ class STFT(nn.Layer):
             The input waveform.
         Returns
         ------------
-        real : Tensor [shape=(B, C, frames)] 
+        real : Tensor [shape=(B, C, frames)]
             The real part of the spectrogram.
             
-        imag : Tensor [shape=(B, C, frames)] 
+        imag : Tensor [shape=(B, C, frames)]
             The image part of the spectrogram.
         """
         x = paddle.unsqueeze(x, axis=1)
         if self.center:
-            x = F.pad(x, [self.n_fft // 2, self.n_fft // 2],
-                      data_format='NCL',
-                      mode=self.pad_mode)
+            x = F.pad(
+                x, [self.n_fft // 2, self.n_fft // 2],
+                data_format='NCL',
+                mode=self.pad_mode)
 
         # to BCT, C=1
         out = F.conv1d(x, self.weight, stride=self.hop_length)
@@ -199,7 +192,7 @@ class STFT(nn.Layer):
             The input waveform.
         Returns
         ------------
-        Tensor [shape=(B, C, T)] 
+        Tensor [shape=(B, C, T)]
             The power spectrum.
         """
         real, imag = self.forward(x)
@@ -214,7 +207,7 @@ class STFT(nn.Layer):
             The input waveform.
         Returns
         ------------
-        Tensor [shape=(B, C, T)] 
+        Tensor [shape=(B, C, T)]
             The magnitude of the spectrum.
         """
         power = self.power(x)
diff --git a/parakeet/modules/conv.py b/parakeet/modules/conv.py
index d984605..d9bd98d 100644
--- a/parakeet/modules/conv.py
+++ b/parakeet/modules/conv.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import paddle
 from paddle import nn
 
@@ -22,48 +21,40 @@ __all__ = [
 
 
 class Conv1dCell(nn.Conv1D):
-    """A subclass of Conv1D layer, which can be used in an autoregressive 
+    """A subclass of Conv1D layer, which can be used in an autoregressive
     decoder like an RNN cell. 
     
-    When used in autoregressive decoding, it performs causal temporal 
-    convolution incrementally. At each time step, it takes a step input and 
-    returns a step output. 
+    When used in autoregressive decoding, it performs causal temporal
+    convolution incrementally. At each time step, it takes a step input and
+    returns a step output.
     
     Notes
     ------
-    It is done by caching an internal buffer of length ``receptive_file - 1``. 
-    when adding a step input, the buffer is shited by one step, the latest 
-    input is added to be buffer and the oldest step is discarded. And it 
-    returns a step output. For single step case, convolution is equivalent to a 
+    It is done by caching an internal buffer of length ``receptive_file - 1``.
+    when adding a step input, the buffer is shited by one step, the latest
+    input is added to be buffer and the oldest step is discarded. And it
+    returns a step output. For single step case, convolution is equivalent to a
     linear transformation.
-    
     That it can be used as a cell depends on several restrictions:
-    
     1. stride must be 1;
     2. padding must be a causal padding (recpetive_field - 1, 0).
-    
-    Thus, these arguments are removed from the ``__init__`` method of this 
+    Thus, these arguments are removed from the ``__init__`` method of this
     class.
     
     Parameters
     ----------
     in_channels: int
         The feature size of the input.
-        
     out_channels: int
         The feature size of the output.
-        
     kernel_size: int or Tuple[int]
         The size of the kernel.
-        
     dilation: int or Tuple[int]
         The dilation of the convolution, by default 1
-        
     weight_attr: ParamAttr, Initializer, str or bool, optional
         The parameter attribute of the convolution kernel, by default None.
-        
     bias_attr: ParamAttr, Initializer, str or bool, optional
-        The parameter attribute of the bias. If ``False``, this layer does not 
+        The parameter attribute of the bias. If ``False``, this layer does not
         have a bias, by default None.
         
     Examples
@@ -114,7 +105,7 @@ class Conv1dCell(nn.Conv1D):
         
         Warnings
         ---------
-        This method should be called before a sequence of calls to 
+        This method should be called before a sequence of calls to
         ``add_input``.
 
         Raises
@@ -165,12 +156,12 @@ class Conv1dCell(nn.Conv1D):
         
         Parameters
         -----------
-        x_t : Tensor [shape=(batch_size, in_channels)] 
+        x_t : Tensor [shape=(batch_size, in_channels)]
             The step input.
             
         Returns
         -------
-        y_t :Tensor [shape=(batch_size, out_channels)] 
+        y_t :Tensor [shape=(batch_size, out_channels)]
             The step output.
         """
         batch_size = x_t.shape[0]
@@ -199,36 +190,27 @@ class Conv1dBatchNorm(nn.Layer):
     ----------
     in_channels : int
         The feature size of the input.
-        
     out_channels : int
         The feature size of the output.
-        
     kernel_size : int
         The size of the convolution kernel.
-        
     stride : int, optional
         The stride of the convolution, by default 1.
-        
     padding : int, str or Tuple[int], optional
-        The padding of the convolution. 
+        The padding of the convolution.
         If int, a symmetrical padding is applied before convolution;
         If str, it should be "same" or "valid";
-        If Tuple[int], its length should be 2, meaning 
+        If Tuple[int], its length should be 2, meaning
         ``(pad_before, pad_after)``, by default 0.
-        
     weight_attr : ParamAttr, Initializer, str or bool, optional
         The parameter attribute of the convolution kernel, by default None.
-        
     bias_attr : ParamAttr, Initializer, str or bool, optional
-        The parameter attribute of the bias of the convolution, by default 
+        The parameter attribute of the bias of the convolution, by default
         None.
-        
     data_format : str ["NCL" or "NLC"], optional
         The data layout of the input, by default "NCL"
-        
     momentum : float, optional
         The momentum of the BatchNorm1D layer, by default 0.9
-        
     epsilon : [type], optional
         The epsilon of the BatchNorm1D layer, by default 1e-05
     """
diff --git a/parakeet/modules/expansion.py b/parakeet/modules/expansion.py
index d136ada..e9d4b6f 100644
--- a/parakeet/modules/expansion.py
+++ b/parakeet/modules/expansion.py
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import numpy as np
-
 import paddle
 from paddle import Tensor
 
diff --git a/parakeet/modules/fastspeech2_predictor/duration_predictor.py b/parakeet/modules/fastspeech2_predictor/duration_predictor.py
index 10e3f38..d0f5262 100644
--- a/parakeet/modules/fastspeech2_predictor/duration_predictor.py
+++ b/parakeet/modules/fastspeech2_predictor/duration_predictor.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Duration predictor related modules."""
-
 import paddle
 from paddle import nn
+
 from parakeet.modules.layer_norm import LayerNorm
 from parakeet.modules.masked_fill import masked_fill
 
@@ -78,8 +78,7 @@ class DurationPredictor(nn.Layer):
                         stride=1,
                         padding=(kernel_size - 1) // 2, ),
                     nn.ReLU(),
-                    LayerNorm(
-                        n_chans, dim=1),
+                    LayerNorm(n_chans, dim=1),
                     nn.Dropout(dropout_rate), ))
         self.linear = nn.Linear(n_chans, 1, bias_attr=True)
 
diff --git a/parakeet/modules/fastspeech2_predictor/length_regulator.py b/parakeet/modules/fastspeech2_predictor/length_regulator.py
index 0e6233c..86f9ebb 100644
--- a/parakeet/modules/fastspeech2_predictor/length_regulator.py
+++ b/parakeet/modules/fastspeech2_predictor/length_regulator.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Length regulator related modules."""
-
 import numpy as np
 import paddle
 from paddle import nn
diff --git a/parakeet/modules/fastspeech2_predictor/postnet.py b/parakeet/modules/fastspeech2_predictor/postnet.py
index 50b849e..885ecda 100644
--- a/parakeet/modules/fastspeech2_predictor/postnet.py
+++ b/parakeet/modules/fastspeech2_predictor/postnet.py
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import six
-import paddle
 from paddle import nn
 
 
diff --git a/parakeet/modules/fastspeech2_predictor/variance_predictor.py b/parakeet/modules/fastspeech2_predictor/variance_predictor.py
index 92136a2..0a980dd 100644
--- a/parakeet/modules/fastspeech2_predictor/variance_predictor.py
+++ b/parakeet/modules/fastspeech2_predictor/variance_predictor.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Variance predictor related modules."""
-
 import paddle
 from paddle import nn
+from typeguard import check_argument_types
+
 from parakeet.modules.layer_norm import LayerNorm
 from parakeet.modules.masked_fill import masked_fill
-from typeguard import check_argument_types
 
 
 class VariancePredictor(nn.Layer):
@@ -69,8 +69,7 @@ class VariancePredictor(nn.Layer):
                         padding=(kernel_size - 1) // 2,
                         bias_attr=True, ),
                     nn.ReLU(),
-                    LayerNorm(
-                        n_chans, dim=1),
+                    LayerNorm(n_chans, dim=1),
                     nn.Dropout(dropout_rate), ))
 
         self.linear = nn.Linear(n_chans, 1, bias_attr=True)
diff --git a/parakeet/modules/fastspeech2_transformer/attention.py b/parakeet/modules/fastspeech2_transformer/attention.py
index 9cb6001..ae941a7 100644
--- a/parakeet/modules/fastspeech2_transformer/attention.py
+++ b/parakeet/modules/fastspeech2_transformer/attention.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Multi-Head Attention layer definition."""
-
 import math
 
 import numpy
 import paddle
 from paddle import nn
+
 from parakeet.modules.masked_fill import masked_fill
 
 
@@ -111,8 +111,7 @@ class MultiHeadedAttention(nn.Layer):
             mask = paddle.logical_not(mask)
             min_value = float(
                 numpy.finfo(
-                    paddle.to_tensor(
-                        0, dtype=scores.dtype).numpy().dtype).min)
+                    paddle.to_tensor(0, dtype=scores.dtype).numpy().dtype).min)
 
             scores = masked_fill(scores, mask, min_value)
             # (batch, head, time1, time2)
diff --git a/parakeet/modules/fastspeech2_transformer/embedding.py b/parakeet/modules/fastspeech2_transformer/embedding.py
index 9767193..71160a6 100644
--- a/parakeet/modules/fastspeech2_transformer/embedding.py
+++ b/parakeet/modules/fastspeech2_transformer/embedding.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Positional Encoding Module."""
-
 import math
 
 import paddle
@@ -55,9 +54,8 @@ class PositionalEncoding(nn.Layer):
             position = paddle.arange(
                 0, x.shape[1], dtype=paddle.float32).unsqueeze(1)
         div_term = paddle.exp(
-            paddle.arange(
-                0, self.d_model, 2,
-                dtype=paddle.float32) * -(math.log(10000.0) / self.d_model))
+            paddle.arange(0, self.d_model, 2, dtype=paddle.float32) *
+            -(math.log(10000.0) / self.d_model))
         pe[:, 0::2] = paddle.sin(position * div_term)
         pe[:, 1::2] = paddle.cos(position * div_term)
         pe = pe.unsqueeze(0)
diff --git a/parakeet/modules/fastspeech2_transformer/encoder.py b/parakeet/modules/fastspeech2_transformer/encoder.py
index 84a6142..630b50f 100644
--- a/parakeet/modules/fastspeech2_transformer/encoder.py
+++ b/parakeet/modules/fastspeech2_transformer/encoder.py
@@ -11,16 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import logging
 
 from paddle import nn
-from parakeet.modules.fastspeech2_transformer.embedding import PositionalEncoding
+
 from parakeet.modules.fastspeech2_transformer.attention import MultiHeadedAttention
+from parakeet.modules.fastspeech2_transformer.embedding import PositionalEncoding
+from parakeet.modules.fastspeech2_transformer.encoder_layer import EncoderLayer
 from parakeet.modules.fastspeech2_transformer.multi_layer_conv import Conv1dLinear
 from parakeet.modules.fastspeech2_transformer.multi_layer_conv import MultiLayeredConv1d
 from parakeet.modules.fastspeech2_transformer.positionwise_feed_forward import PositionwiseFeedForward
-from parakeet.modules.fastspeech2_transformer.encoder_layer import EncoderLayer
 from parakeet.modules.fastspeech2_transformer.repeat import repeat
 
 
@@ -90,16 +90,14 @@ class Encoder(nn.Layer):
         self.conv_subsampling_factor = 1
         if input_layer == "linear":
             self.embed = nn.Sequential(
-                nn.Linear(
-                    idim, attention_dim, bias_attr=True),
+                nn.Linear(idim, attention_dim, bias_attr=True),
                 nn.LayerNorm(attention_dim),
                 nn.Dropout(dropout_rate),
                 nn.ReLU(),
                 pos_enc_class(attention_dim, positional_dropout_rate), )
         elif input_layer == "embed":
             self.embed = nn.Sequential(
-                nn.Embedding(
-                    idim, attention_dim, padding_idx=padding_idx),
+                nn.Embedding(idim, attention_dim, padding_idx=padding_idx),
                 pos_enc_class(attention_dim, positional_dropout_rate), )
         elif isinstance(input_layer, nn.Layer):
             self.embed = nn.Sequential(
@@ -125,10 +123,9 @@ class Encoder(nn.Layer):
         ]:
             logging.info("encoder self-attention layer type = self-attention")
             encoder_selfattn_layer = MultiHeadedAttention
-            encoder_selfattn_layer_args = [(
-                attention_heads,
-                attention_dim,
-                attention_dropout_rate, )] * num_blocks
+            encoder_selfattn_layer_args = [
+                (attention_heads, attention_dim, attention_dropout_rate, )
+            ] * num_blocks
 
         else:
             raise NotImplementedError(selfattention_layer_type)
@@ -159,18 +156,14 @@ class Encoder(nn.Layer):
                                        dropout_rate)
         elif positionwise_layer_type == "conv1d":
             positionwise_layer = MultiLayeredConv1d
-            positionwise_layer_args = (
-                attention_dim,
-                linear_units,
-                positionwise_conv_kernel_size,
-                dropout_rate, )
+            positionwise_layer_args = (attention_dim, linear_units,
+                                       positionwise_conv_kernel_size,
+                                       dropout_rate, )
         elif positionwise_layer_type == "conv1d-linear":
             positionwise_layer = Conv1dLinear
-            positionwise_layer_args = (
-                attention_dim,
-                linear_units,
-                positionwise_conv_kernel_size,
-                dropout_rate, )
+            positionwise_layer_args = (attention_dim, linear_units,
+                                       positionwise_conv_kernel_size,
+                                       dropout_rate, )
         else:
             raise NotImplementedError("Support only linear or conv1d.")
         return positionwise_layer, positionwise_layer_args
@@ -229,4 +222,4 @@ class Encoder(nn.Layer):
             new_cache.append(xs)
         if self.normalize_before:
             xs = self.after_norm(xs)
-        return xs, masks, new_cache
\ No newline at end of file
+        return xs, masks, new_cache
diff --git a/parakeet/modules/fastspeech2_transformer/encoder_layer.py b/parakeet/modules/fastspeech2_transformer/encoder_layer.py
index 00d551e..d8f89d6 100644
--- a/parakeet/modules/fastspeech2_transformer/encoder_layer.py
+++ b/parakeet/modules/fastspeech2_transformer/encoder_layer.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Encoder self-attention layer definition."""
-
 import paddle
 from paddle import nn
 
diff --git a/parakeet/modules/fastspeech2_transformer/multi_layer_conv.py b/parakeet/modules/fastspeech2_transformer/multi_layer_conv.py
index 273d8d0..8845b2a 100644
--- a/parakeet/modules/fastspeech2_transformer/multi_layer_conv.py
+++ b/parakeet/modules/fastspeech2_transformer/multi_layer_conv.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Layer modules for FFT block in FastSpeech (Feed-forward Transformer)."""
-
 import paddle
 
 
diff --git a/parakeet/modules/fastspeech2_transformer/positionwise_feed_forward.py b/parakeet/modules/fastspeech2_transformer/positionwise_feed_forward.py
index c57fba6..39c06eb 100644
--- a/parakeet/modules/fastspeech2_transformer/positionwise_feed_forward.py
+++ b/parakeet/modules/fastspeech2_transformer/positionwise_feed_forward.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Positionwise feed forward layer definition."""
-
 import paddle
 
 
diff --git a/parakeet/modules/fastspeech2_transformer/repeat.py b/parakeet/modules/fastspeech2_transformer/repeat.py
index 250a3a4..3c62298 100644
--- a/parakeet/modules/fastspeech2_transformer/repeat.py
+++ b/parakeet/modules/fastspeech2_transformer/repeat.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Repeat the same layer definition."""
-
 import paddle
 
 
diff --git a/parakeet/modules/geometry.py b/parakeet/modules/geometry.py
index 05a5931..a3d56f7 100644
--- a/parakeet/modules/geometry.py
+++ b/parakeet/modules/geometry.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import numpy as np
 import paddle
 
@@ -23,15 +22,13 @@ def shuffle_dim(x, axis, perm=None):
     ----------
     x : Tensor
         The input tensor.
-        
     axis : int
         The axis to shuffle.
-        
     perm : List[int], ndarray, optional
         The order to reorder the tensor along the ``axis``-th dimension.
         
-        It is a permutation of ``[0, d)``, where d is the size of the 
-        ``axis``-th dimension of the input tensor. If not provided, 
+        It is a permutation of ``[0, d)``, where d is the size of the
+        ``axis``-th dimension of the input tensor. If not provided,
         a random permutation is used. Defaults to None.
 
     Returns
diff --git a/parakeet/modules/layer_norm.py b/parakeet/modules/layer_norm.py
index 2ff91b8..3bab823 100644
--- a/parakeet/modules/layer_norm.py
+++ b/parakeet/modules/layer_norm.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Layer normalization module."""
-
 import paddle
 
 
@@ -55,8 +54,9 @@ class LayerNorm(paddle.nn.LayerNorm):
 
             orig_perm = list(range(len_dim))
             new_perm = orig_perm[:]
-            new_perm[self.dim], new_perm[len_dim - 1] = new_perm[
-                len_dim - 1], new_perm[self.dim]
+            new_perm[self.dim], new_perm[len_dim -
+                                         1] = new_perm[len_dim -
+                                                       1], new_perm[self.dim]
 
             return paddle.transpose(
                 super(LayerNorm, self).forward(paddle.transpose(x, new_perm)),
diff --git a/parakeet/modules/losses.py b/parakeet/modules/losses.py
index 9c34cc7..ece9e04 100644
--- a/parakeet/modules/losses.py
+++ b/parakeet/modules/losses.py
@@ -11,13 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import numba
-import numpy as np
 import paddle
-from paddle import nn
-from paddle.nn import functional as F
 from paddle.fluid.layers import sequence_mask
+from paddle.nn import functional as F
 
 __all__ = [
     "guided_attention_loss",
@@ -30,7 +26,7 @@ __all__ = [
 def attention_guide(dec_lens, enc_lens, N, T, g, dtype=None):
     """Build that W matrix. shape(B, T_dec, T_enc)
     W[i, n, t] = 1 - exp(-(n/dec_lens[i] - t/enc_lens[i])**2 / (2g**2)) 
-    
+
     See also:
     Tachibana, Hideyuki, Katsuya Uenoyama, and Shunsuke Aihara. 2017. “Efficiently Trainable Text-to-Speech System Based on Deep Convolutional Networks with Guided Attention.” ArXiv:1710.08969 [Cs, Eess], October. http://arxiv.org/abs/1710.08969.
     """
@@ -88,12 +84,10 @@ def masked_l1_loss(prediction, target, mask):
     ----------
     prediction : Tensor
         The prediction.
-        
     target : Tensor
         The target. The shape should be broadcastable to ``prediction``.
-        
     mask : Tensor
-        The mask. The shape should be broadcatable to the broadcasted shape of 
+        The mask. The shape should be broadcatable to the broadcasted shape of
         ``prediction`` and ``target``.
 
     Returns
@@ -113,13 +107,10 @@ def masked_softmax_with_cross_entropy(logits, label, mask, axis=-1):
     ----------
     logits : Tensor
         The logits. The ``axis``-th axis is the class dimension.
-        
     label : Tensor [dtype: int]
         The label. The size of the ``axis``-th axis should be 1.
-        
     mask : Tensor 
         The mask. The shape should be broadcastable to ``label``.
-        
     axis : int, optional
         The index of the class dimension in the shape of ``logits``, by default
         -1.
diff --git a/parakeet/modules/masked_fill.py b/parakeet/modules/masked_fill.py
index 4ca9826..34230f1 100644
--- a/parakeet/modules/masked_fill.py
+++ b/parakeet/modules/masked_fill.py
@@ -11,9 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Union
 
 import paddle
-from typing import Union
 
 
 def is_broadcastable(shp1, shp2):
@@ -34,4 +34,4 @@ def masked_fill(xs: paddle.Tensor,
     trues = paddle.ones_like(xs) * value
     mask = mask.cast(dtype=paddle.bool)
     xs = paddle.where(mask, trues, xs)
-    return xs
\ No newline at end of file
+    return xs
diff --git a/parakeet/modules/masking.py b/parakeet/modules/masking.py
index 96871a9..7cf3704 100644
--- a/parakeet/modules/masking.py
+++ b/parakeet/modules/masking.py
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import paddle
-from paddle.fluid.layers import sequence_mask
 
 __all__ = [
     "id_mask",
@@ -25,7 +23,7 @@ __all__ = [
 
 def id_mask(input, padding_index=0, dtype="bool"):
     """Generate mask with input ids. 
-    
+
     Those positions where the value equals ``padding_index`` correspond to 0 or
     ``False``, otherwise, 1 or ``True``.
 
@@ -33,10 +31,8 @@ def id_mask(input, padding_index=0, dtype="bool"):
     ----------
     input : Tensor [dtype: int]
         The input tensor. It represents the ids.
-        
     padding_index : int, optional
         The id which represents padding, by default 0.
-        
     dtype : str, optional
         Data type of the returned mask, by default "bool".
 
@@ -50,7 +46,7 @@ def id_mask(input, padding_index=0, dtype="bool"):
 
 def feature_mask(input, axis, dtype="bool"):
     """Compute mask from input features.
-    
+
     For a input features, represented as batched feature vectors, those vectors
     which all zeros are considerd padding vectors.
 
@@ -58,19 +54,16 @@ def feature_mask(input, axis, dtype="bool"):
     ----------
     input : Tensor [dtype: float]
         The input tensor which represents featues.
-        
     axis : int
         The index of the feature dimension in ``input``. Other dimensions are
         considered ``spatial`` dimensions.
-        
     dtype : str, optional
         Data type of the generated mask, by default "bool"
-
     Returns
     -------
     Tensor
         The geenrated mask with ``spatial`` shape as mentioned above.
-        
+
         It has one less dimension than ``input`` does.
     """
     feature_sum = paddle.sum(paddle.abs(input), axis)
@@ -83,22 +76,20 @@ def combine_mask(mask1, mask2):
     Parameters
     -----------
     mask1 : Tensor
-        The first mask. 
-        
+        The first mask.
     mask2 : Tensor
         The second mask with broadcastable shape with ``mask1``.
-        
     Returns
     --------
     Tensor
         Combined mask.
-        
+
     Notes
     ------
-    It is mainly used to combine the padding mask and no future mask for 
+    It is mainly used to combine the padding mask and no future mask for
     transformer decoder. 
-    
-    Padding mask is used to mask padding positions of the decoder inputs and 
+
+    Padding mask is used to mask padding positions of the decoder inputs and
     no future mask is used to prevent the decoder to see future information.
     """
     if mask1.dtype == paddle.fluid.core.VarDesc.VarType.BOOL:
@@ -109,8 +100,8 @@ def combine_mask(mask1, mask2):
 
 def future_mask(time_steps, dtype="bool"):
     """Generate lower triangular mask.
-    
-    It is used at transformer decoder to prevent the decoder to see future 
+
+    It is used at transformer decoder to prevent the decoder to see future
     information.
 
     Parameters
diff --git a/parakeet/modules/nets_utils.py b/parakeet/modules/nets_utils.py
index 5997873..47eae65 100644
--- a/parakeet/modules/nets_utils.py
+++ b/parakeet/modules/nets_utils.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import paddle
 from paddle import nn
 from typeguard import check_argument_types
diff --git a/parakeet/modules/normalizer.py b/parakeet/modules/normalizer.py
index 176741b..a4fc598 100644
--- a/parakeet/modules/normalizer.py
+++ b/parakeet/modules/normalizer.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import paddle
 from paddle import nn
 
diff --git a/parakeet/modules/positional_encoding.py b/parakeet/modules/positional_encoding.py
index 919af10..7c368c3 100644
--- a/parakeet/modules/positional_encoding.py
+++ b/parakeet/modules/positional_encoding.py
@@ -11,13 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import math
-import numpy as np
-
 import paddle
 from paddle import Tensor
-from paddle.nn import functional as F
 
 __all__ = ["sinusoid_position_encoding", "scaled_position_encoding"]
 
diff --git a/parakeet/modules/ssim.py b/parakeet/modules/ssim.py
index 3e4b20d..c9899cd 100644
--- a/parakeet/modules/ssim.py
+++ b/parakeet/modules/ssim.py
@@ -11,13 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from math import exp
 
-import numpy as np
 import paddle
-from paddle import nn
 import paddle.nn.functional as F
+from paddle import nn
 
 
 def gaussian(window_size, sigma):
@@ -30,9 +28,8 @@ def gaussian(window_size, sigma):
 
 def create_window(window_size, channel):
     _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
-    _2D_window = paddle.matmul(_1D_window,
-                               paddle.transpose(_1D_window,
-                                                [1, 0])).unsqueeze([0, 1])
+    _2D_window = paddle.matmul(_1D_window, paddle.transpose(
+        _1D_window, [1, 0])).unsqueeze([0, 1])
     window = paddle.expand(_2D_window, [channel, 1, window_size, window_size])
     return window
 
@@ -50,8 +47,7 @@ def _ssim(img1, img2, window, window_size, channel, size_average=True):
     sigma2_sq = F.conv2d(
         img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
     sigma12 = F.conv2d(
-        img1 * img2, window, padding=window_size // 2,
-        groups=channel) - mu1_mu2
+        img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
 
     C1 = 0.01**2
     C2 = 0.03**2
@@ -81,4 +77,4 @@ class SSIM(nn.Layer):
 def ssim(img1, img2, window_size=11, size_average=True):
     (_, channel, _, _) = img1.shape
     window = create_window(window_size, channel)
-    return _ssim(img1, img2, window, window_size, channel, size_average)
\ No newline at end of file
+    return _ssim(img1, img2, window, window_size, channel, size_average)
diff --git a/parakeet/modules/stft_loss.py b/parakeet/modules/stft_loss.py
index 7c3779c..16382d6 100644
--- a/parakeet/modules/stft_loss.py
+++ b/parakeet/modules/stft_loss.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import paddle
 from paddle import nn
 from paddle.nn import functional as F
@@ -28,16 +27,20 @@ class SpectralConvergenceLoss(nn.Layer):
 
     def forward(self, x_mag, y_mag):
         """Calculate forward propagation.
-        Args:
-            x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
-            y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
-        Returns:
-            Tensor: Spectral convergence loss value.
+        Parameters
+        ----------
+        x_mag : Tensor
+            Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
+        y_mag : Tensor)
+            Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
+        Returns
+        ----------
+        Tensor
+            Spectral convergence loss value.
         """
         return paddle.norm(
             y_mag - x_mag, p="fro") / paddle.clip(
-                paddle.norm(
-                    y_mag, p="fro"), min=1e-10)
+                paddle.norm(y_mag, p="fro"), min=1e-10)
 
 
 class LogSTFTMagnitudeLoss(nn.Layer):
@@ -62,10 +65,8 @@ class LogSTFTMagnitudeLoss(nn.Layer):
             Log STFT magnitude loss value.
         """
         return F.l1_loss(
-            paddle.log(paddle.clip(
-                y_mag, min=self.epsilon)),
-            paddle.log(paddle.clip(
-                x_mag, min=self.epsilon)))
+            paddle.log(paddle.clip(y_mag, min=self.epsilon)),
+            paddle.log(paddle.clip(x_mag, min=self.epsilon)))
 
 
 class STFTLoss(nn.Layer):
diff --git a/parakeet/modules/transformer.py b/parakeet/modules/transformer.py
index e857990..696b12b 100644
--- a/parakeet/modules/transformer.py
+++ b/parakeet/modules/transformer.py
@@ -11,14 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import math
-import paddle
 from paddle import nn
-from paddle.nn import functional as F
-
 from parakeet.modules import attention as attn
-from parakeet.modules.masking import combine_mask
+from paddle.nn import functional as F
 
 __all__ = [
     "PositionwiseFFN",
@@ -31,18 +26,16 @@ class PositionwiseFFN(nn.Layer):
     """A faithful implementation of Position-wise Feed-Forward Network 
     in `Attention is All You Need <https://arxiv.org/abs/1706.03762>`_.
     It is basically a 2-layer MLP, with relu actication and dropout in between.
-    
+
     Parameters
     ----------
     input_size: int
-        The feature size of the intput. It is also the feature size of the 
+        The feature size of the intput. It is also the feature size of the
         output.
-        
     hidden_size: int
         The hidden size.
-        
     dropout: float
-        The probability of the Dropout applied to the output of the first 
+        The probability of the Dropout applied to the output of the first
         layer, by default 0.
     """
 
@@ -74,30 +67,27 @@ class PositionwiseFFN(nn.Layer):
 
 
 class TransformerEncoderLayer(nn.Layer):
-    """A faithful implementation of Transformer encoder layer in 
+    """A faithful implementation of Transformer encoder layer in
     `Attention is All You Need <https://arxiv.org/abs/1706.03762>`_.
-    
+
     Parameters
     ----------
     d_model :int 
-        The feature size of the input. It is also the feature size of the 
+        The feature size of the input. It is also the feature size of the
         output.
-        
     n_heads : int
-        The number of heads of self attention (a ``MultiheadAttention`` 
+        The number of heads of self attention (a ``MultiheadAttention``
         layer).
-        
     d_ffn : int 
-        The hidden size of the positional feed forward network (a 
+        The hidden size of the positional feed forward network (a
         ``PositionwiseFFN`` layer).
-        
     dropout : float, optional
-        The probability of the dropout in MultiHeadAttention and 
+        The probability of the dropout in MultiHeadAttention and
         PositionwiseFFN, by default 0.
-        
+
     Notes
     ------
-    It uses the PostLN (post layer norm) scheme. 
+    It uses the PostLN (post layer norm) scheme.
     """
 
     def __init__(self, d_model, n_heads, d_ffn, dropout=0.):
@@ -112,60 +102,54 @@ class TransformerEncoderLayer(nn.Layer):
 
     def forward(self, x, mask):
         """Forward pass of TransformerEncoderLayer.
-        
+
         Parameters
         ----------
         x : Tensor [shape=(batch_size, time_steps, d_model)]
             The input.
-            
         mask : Tensor
-            The padding mask. The shape is (batch_size, time_steps, 
+            The padding mask. The shape is (batch_size, time_steps,
             time_steps) or broadcastable shape.
-        
+
         Returns
         -------
         x :Tensor [shape=(batch_size, time_steps, d_model)]
             The encoded output.
-            
+
         attn_weights : Tensor [shape=(batch_size, n_heads, time_steps, time_steps)]
             The attention weights of the self attention.
         """
         context_vector, attn_weights = self.self_mha(x, x, x, mask)
         x = self.layer_norm1(
-            F.dropout(
-                x + context_vector, self.dropout, training=self.training))
+            F.dropout(x + context_vector, self.dropout, training=self.training))
 
         x = self.layer_norm2(
-            F.dropout(
-                x + self.ffn(x), self.dropout, training=self.training))
+            F.dropout(x + self.ffn(x), self.dropout, training=self.training))
         return x, attn_weights
 
 
 class TransformerDecoderLayer(nn.Layer):
     """A faithful implementation of Transformer decoder layer in 
     `Attention is All You Need <https://arxiv.org/abs/1706.03762>`_.
-    
+
     Parameters
     ----------
     d_model :int 
-        The feature size of the input. It is also the feature size of the 
+        The feature size of the input. It is also the feature size of the
         output.
-        
     n_heads : int
-        The number of heads of attentions (``MultiheadAttention`` 
+        The number of heads of attentions (``MultiheadAttention``
         layers).
-        
     d_ffn : int 
-        The hidden size of the positional feed forward network (a 
+        The hidden size of the positional feed forward network (a
         ``PositionwiseFFN`` layer).
-        
     dropout : float, optional
-        The probability of the dropout in MultiHeadAttention and 
+        The probability of the dropout in MultiHeadAttention and
         PositionwiseFFN, by default 0.
-        
+
     Notes
     ------
-    It uses the PostLN (post layer norm) scheme. 
+    It uses the PostLN (post layer norm) scheme.
     """
 
     def __init__(self, d_model, n_heads, d_ffn, dropout=0.):
@@ -183,46 +167,41 @@ class TransformerDecoderLayer(nn.Layer):
 
     def forward(self, q, k, v, encoder_mask, decoder_mask):
         """Forward pass of TransformerEncoderLayer.
-        
+
         Parameters
         ----------
-        q : Tensor [shape=(batch_size, time_steps_q, d_model)] 
+        q : Tensor [shape=(batch_size, time_steps_q, d_model)]
             The decoder input.
-        k : Tensor [shape=(batch_size, time_steps_k, d_model)] 
+        k : Tensor [shape=(batch_size, time_steps_k, d_model)]
             The keys.
         v : Tensor [shape=(batch_size, time_steps_k, d_model)]
             The values
         encoder_mask : Tensor
-            Encoder padding mask, shape is ``(batch_size, time_steps_k, 
+            Encoder padding mask, shape is ``(batch_size, time_steps_k,
             time_steps_k)`` or broadcastable shape.
         decoder_mask : Tensor
             Decoder mask, shape is ``(batch_size, time_steps_q, time_steps_k)``
             or broadcastable shape. 
-        
+
         Returns
         --------
         q : Tensor [shape=(batch_size, time_steps_q, d_model)]
             The decoder output.
-            
         self_attn_weights : Tensor [shape=(batch_size, n_heads, time_steps_q, time_steps_q)]
             Decoder self attention.
-            
-        cross_attn_weights : Tensor [shape=(batch_size, n_heads, time_steps_q, time_steps_k)] 
+
+        cross_attn_weights : Tensor [shape=(batch_size, n_heads, time_steps_q, time_steps_k)]
             Decoder-encoder cross attention.
         """
-        context_vector, self_attn_weights = self.self_mha(q, q, q,
-                                                          decoder_mask)
+        context_vector, self_attn_weights = self.self_mha(q, q, q, decoder_mask)
         q = self.layer_norm1(
-            F.dropout(
-                q + context_vector, self.dropout, training=self.training))
+            F.dropout(q + context_vector, self.dropout, training=self.training))
 
         context_vector, cross_attn_weights = self.cross_mha(q, k, v,
                                                             encoder_mask)
         q = self.layer_norm2(
-            F.dropout(
-                q + context_vector, self.dropout, training=self.training))
+            F.dropout(q + context_vector, self.dropout, training=self.training))
 
         q = self.layer_norm3(
-            F.dropout(
-                q + self.ffn(q), self.dropout, training=self.training))
+            F.dropout(q + self.ffn(q), self.dropout, training=self.training))
         return q, self_attn_weights, cross_attn_weights
diff --git a/parakeet/training/__init__.py b/parakeet/training/__init__.py
index aec401c..abf198b 100644
--- a/parakeet/training/__init__.py
+++ b/parakeet/training/__init__.py
@@ -11,6 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from parakeet.training.cli import *
-from parakeet.training.experiment import *
diff --git a/parakeet/training/cli.py b/parakeet/training/cli.py
index a3cfbda..a630994 100644
--- a/parakeet/training/cli.py
+++ b/parakeet/training/cli.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import argparse
 
 
@@ -33,7 +32,6 @@ def default_argument_parser():
     
     The ``--device`` and ``--nprocs`` specifies how to run the training.
     
-    
     See Also
     --------
     parakeet.training.experiment
diff --git a/parakeet/training/default_config.py b/parakeet/training/default_config.py
index 583f6e6..7deb795 100644
--- a/parakeet/training/default_config.py
+++ b/parakeet/training/default_config.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from yacs.config import CfgNode
 
 _C = CfgNode(
diff --git a/parakeet/training/experiment.py b/parakeet/training/experiment.py
index 5daaf08..892e810 100644
--- a/parakeet/training/experiment.py
+++ b/parakeet/training/experiment.py
@@ -11,9 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import sys
 import logging
+import sys
 from pathlib import Path
 
 import paddle
@@ -21,7 +20,8 @@ from paddle import distributed as dist
 from paddle.io import DistributedBatchSampler
 from visualdl import LogWriter
 
-from parakeet.utils import checkpoint, mp_tools
+from parakeet.utils import checkpoint
+from parakeet.utils import mp_tools
 
 __all__ = ["ExperimentBase"]
 
diff --git a/parakeet/training/extension.py b/parakeet/training/extension.py
index 57c4f29..07e9269 100644
--- a/parakeet/training/extension.py
+++ b/parakeet/training/extension.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from typing import Callable
 
 PRIORITY_WRITER = 300
diff --git a/parakeet/training/extensions/evaluator.py b/parakeet/training/extensions/evaluator.py
index 6ebaae6..47b3527 100644
--- a/parakeet/training/extensions/evaluator.py
+++ b/parakeet/training/extensions/evaluator.py
@@ -11,18 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Dict
 
-from collections import defaultdict
-from typing import Optional, Callable, Dict
-
-from tqdm import tqdm
 import paddle
-from paddle import Tensor
-from paddle.nn import Layer
 from paddle.io import DataLoader
+from paddle.nn import Layer
 
-from parakeet.training.reporter import scope, report, DictSummary
 from parakeet.training import extension
+from parakeet.training.reporter import DictSummary
+from parakeet.training.reporter import report
+from parakeet.training.reporter import scope
 
 
 class StandardEvaluator(extension.Extension):
diff --git a/parakeet/training/extensions/snapshot.py b/parakeet/training/extensions/snapshot.py
index 92d74ef..7806dd6 100644
--- a/parakeet/training/extensions/snapshot.py
+++ b/parakeet/training/extensions/snapshot.py
@@ -11,18 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import os
 import logging
-from pathlib import Path
+import os
 from datetime import datetime
-from typing import List, Dict, Any
+from pathlib import Path
+from typing import Any
+from typing import Dict
+from typing import List
 
 import jsonlines
 
-from parakeet.utils.mp_tools import rank_zero_only
-from parakeet.training.trainer import Trainer
 from parakeet.training import extension
+from parakeet.training.trainer import Trainer
+from parakeet.utils.mp_tools import rank_zero_only
 
 
 def load_records(records_fp):
@@ -56,7 +57,7 @@ class Snapshot(extension.Extension):
         self.max_size = max_size
         self._snapshot_on_error = snapshot_on_error
         self._save_all = (max_size == -1)
-        self.checkpoint_dir =...
+        self.checkpoint_dir = None
 
     def initialize(self, trainer: Trainer):
         """Setting up this extention."""
@@ -107,4 +108,4 @@ class Snapshot(extension.Extension):
         with jsonlines.open(record_path, 'w') as writer:
             for record in self.records:
                 # jsonlines.open may return a Writer or a Reader
-                writer.write(record)  # pylint: disable=no-member 
+                writer.write(record)  # pylint: disable=no-member
diff --git a/parakeet/training/extensions/visualizer.py b/parakeet/training/extensions/visualizer.py
index 138bf1e..1c66ad8 100644
--- a/parakeet/training/extensions/visualizer.py
+++ b/parakeet/training/extensions/visualizer.py
@@ -11,11 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from visualdl import LogWriter
-
-from parakeet.training.trainer import Trainer
 from parakeet.training import extension
+from parakeet.training.trainer import Trainer
 
 
 class VisualDL(extension.Extension):
diff --git a/parakeet/training/reporter.py b/parakeet/training/reporter.py
index c2f171c..013c754 100644
--- a/parakeet/training/reporter.py
+++ b/parakeet/training/reporter.py
@@ -11,9 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import math
 import contextlib
+import math
 from collections import defaultdict
 
 OBSERVATIONS = None
diff --git a/parakeet/training/seeding.py b/parakeet/training/seeding.py
index 1663d2d..8ca30fd 100644
--- a/parakeet/training/seeding.py
+++ b/parakeet/training/seeding.py
@@ -11,12 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import random
 import logging
+import random
 
-import paddle
 import numpy as np
+import paddle
 
 
 def seed_everything(seed: int):
diff --git a/parakeet/training/trainer.py b/parakeet/training/trainer.py
index d0a1494..65e2f5e 100644
--- a/parakeet/training/trainer.py
+++ b/parakeet/training/trainer.py
@@ -11,20 +11,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import sys
-import six
 import traceback
-from pathlib import Path
 from collections import OrderedDict
-from typing import Callable, Union, List
+from pathlib import Path
+from typing import Callable
+from typing import List
+from typing import Union
 
+import six
 import tqdm
 
-from parakeet.training.trigger import get_trigger, IntervalTrigger, LimitTrigger
-from parakeet.training.updater import UpdaterBase
+from parakeet.training.extension import Extension
+from parakeet.training.extension import PRIORITY_READER
 from parakeet.training.reporter import scope
-from parakeet.training.extension import Extension, PRIORITY_READER
+from parakeet.training.trigger import get_trigger
+from parakeet.training.triggers.limit_trigger import LimitTrigger
+from parakeet.training.updater import UpdaterBase
 
 
 class _ExtensionEntry(object):
@@ -44,7 +47,7 @@ class Trainer(object):
         self.extensions = OrderedDict()
         self.stop_trigger = LimitTrigger(*stop_trigger)
         self.out = Path(out)
-        self.observation =...
+        self.observation = None
 
         self._done = False
         if extensions:
@@ -70,8 +73,7 @@ class Trainer(object):
                 if name is None:
                     name = getattr(extension, '__name__', None)
                     if name is None:
-                        raise ValueError(
-                            "Name is not given for the extension.")
+                        raise ValueError("Name is not given for the extension.")
         if name == 'training':
             raise ValueError("training is a reserved name.")
 
@@ -112,8 +114,7 @@ class Trainer(object):
             self.extensions.keys(),
             key=lambda name: self.extensions[name].priority,
             reverse=True)
-        extensions = [(name, self.extensions[name])
-                      for name in extension_order]
+        extensions = [(name, self.extensions[name]) for name in extension_order]
 
         # initializing all extensions
         for name, entry in extensions:
@@ -126,7 +127,7 @@ class Trainer(object):
         # display only one progress bar
         max_iteration = None
         if isinstance(stop_trigger, LimitTrigger):
-            if stop_trigger.unit is 'epoch':
+            if stop_trigger.unit == 'epoch':
                 max_epoch = self.stop_trigger.limit
                 updates_per_epoch = getattr(self.updater, "updates_per_epoch",
                                             None)
@@ -134,8 +135,7 @@ class Trainer(object):
             else:
                 max_iteration = self.stop_trigger.limit
 
-        p = tqdm.tqdm(
-            initial=self.updater.state.iteration, total=max_iteration)
+        p = tqdm.tqdm(initial=self.updater.state.iteration, total=max_iteration)
 
         try:
             while not stop_trigger(self):
diff --git a/parakeet/training/trigger.py b/parakeet/training/trigger.py
index b588512..f5724c8 100644
--- a/parakeet/training/trigger.py
+++ b/parakeet/training/trigger.py
@@ -11,10 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from parakeet.training.triggers.interval_trigger import IntervalTrigger
-from parakeet.training.triggers.limit_trigger import LimitTrigger
-from parakeet.training.triggers.time_trigger import TimeTrigger
 
 
 def never_file_trigger(trainer):
diff --git a/parakeet/training/triggers/interval_trigger.py b/parakeet/training/triggers/interval_trigger.py
index e21afdd..98c0368 100644
--- a/parakeet/training/triggers/interval_trigger.py
+++ b/parakeet/training/triggers/interval_trigger.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from copy import deepcopy
-
 
 class IntervalTrigger(object):
     """A Predicate to do something every N cycle."""
diff --git a/parakeet/training/updater.py b/parakeet/training/updater.py
index 5ec5eec..1db3d5f 100644
--- a/parakeet/training/updater.py
+++ b/parakeet/training/updater.py
@@ -11,22 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import logging
 from dataclasses import dataclass
-from typing import Optional
-from typing import Dict
-from typing import Union
 
-from timer import timer
 import paddle
-from paddle import Tensor
-from paddle.nn import Layer
-from paddle.optimizer import Optimizer
-from paddle.io import DataLoader
-from paddle.io import DistributedBatchSampler
-
-from parakeet.training.reporter import report
 
 
 @dataclass
diff --git a/parakeet/training/updaters/standard_updater.py b/parakeet/training/updaters/standard_updater.py
index 62751cf..2725bb3 100644
--- a/parakeet/training/updaters/standard_updater.py
+++ b/parakeet/training/updaters/standard_updater.py
@@ -11,23 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import logging
-from dataclasses import dataclass
-from typing import Optional
 from typing import Dict
-from typing import Union
+from typing import Optional
 
-from timer import timer
-import paddle
 from paddle import Tensor
-from paddle.nn import Layer
-from paddle.optimizer import Optimizer
 from paddle.io import DataLoader
 from paddle.io import DistributedBatchSampler
+from paddle.nn import Layer
+from paddle.optimizer import Optimizer
+from timer import timer
 
 from parakeet.training.reporter import report
-from parakeet.training.updater import UpdaterBase, UpdaterState
+from parakeet.training.updater import UpdaterBase
+from parakeet.training.updater import UpdaterState
 
 
 class StandardUpdater(UpdaterBase):
@@ -65,34 +62,34 @@ class StandardUpdater(UpdaterBase):
         # We increase the iteration index after updating and before extension.
         # Here are the reasons.
 
-        # 0. Snapshotting(as well as other extensions, like visualizer) is 
+        # 0. Snapshotting(as well as other extensions, like visualizer) is
         #    executed after a step of updating;
-        # 1. We decide to increase the iteration index after updating and 
+        # 1. We decide to increase the iteration index after updating and
         #    before any all extension is executed. 
-        # 3. We do not increase the iteration after extension because we 
-        #    prefer a consistent resume behavior, when load from a 
-        #    `snapshot_iter_100.pdz` then the next step to train is `101`, 
-        #    naturally. But if iteration is increased increased after 
-        #    extension(including snapshot), then, a `snapshot_iter_99` is 
-        #    loaded. You would need a extra increasing of the iteration idex 
-        #    before training to avoid another iteration `99`, which has been 
+        # 3. We do not increase the iteration after extension because we
+        #    prefer a consistent resume behavior, when load from a
+        #    `snapshot_iter_100.pdz` then the next step to train is `101`,
+        #    naturally. But if iteration is increased increased after
+        #    extension(including snapshot), then, a `snapshot_iter_99` is
+        #    loaded. You would need a extra increasing of the iteration idex
+        #    before training to avoid another iteration `99`, which has been
         #    done before snapshotting.
-        # 4. Thus iteration index represrnts "currently how mant epochs has 
+        # 4. Thus iteration index represrnts "currently how mant epochs has
         #    been done."
-        # NOTE: use report to capture the correctly value. If you want to 
+        # NOTE: use report to capture the correctly value. If you want to
         # report the learning rate used for a step, you must report it before
-        # the learning rate scheduler's step() has been called. In paddle's 
+        # the learning rate scheduler's step() has been called. In paddle's
         # convention, we do not use an extension to change the learning rate.
         # so if you want to report it, do it in the updater.
 
-        # Then here comes the next question. When is the proper time to 
-        # increase the epoch index? Since all extensions are executed after 
-        # updating, it is the time that after updating is the proper time to 
-        # increase epoch index. 
+        # Then here comes the next question. When is the proper time to
+        # increase the epoch index? Since all extensions are executed after
+        # updating, it is the time that after updating is the proper time to
+        # increase epoch index.
         # 1. If we increase the epoch index before updating, then an extension
-        #    based ot epoch would miss the correct timing. It could only be 
+        #    based ot epoch would miss the correct timing. It could only be
         #    triggerd after an extra updating.
-        # 2. Theoretically, when an epoch is done, the epoch index should be 
+        # 2. Theoretically, when an epoch is done, the epoch index should be
         #    increased. So it would be increase after updating.
         # 3. Thus, eppoch index represents "currently how many epochs has been
         #    done." So it starts from 0.
@@ -140,7 +137,7 @@ class StandardUpdater(UpdaterBase):
 
     @property
     def updates_per_epoch(self):
-        """Number of updater per epoch, determined by the length of the 
+        """Number of updater per epoch, determined by the length of the
         dataloader."""
         length_of_dataloader = None
         try:
diff --git a/parakeet/utils/__init__.py b/parakeet/utils/__init__.py
index a3bd0dc..abf198b 100644
--- a/parakeet/utils/__init__.py
+++ b/parakeet/utils/__init__.py
@@ -11,5 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from . import checkpoint, layer_tools, scheduler, display, mp_tools
diff --git a/parakeet/utils/checkpoint.py b/parakeet/utils/checkpoint.py
index 0d2a2e2..8df791b 100644
--- a/parakeet/utils/checkpoint.py
+++ b/parakeet/utils/checkpoint.py
@@ -11,14 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import os
-import time
-import numpy as np
+
 import paddle
 from paddle import distributed as dist
-from paddle.nn import Layer
-from paddle.optimizer import Optimizer
 
 from parakeet.utils import mp_tools
 
@@ -66,7 +62,7 @@ def load_parameters(model,
                     optimizer=None,
                     checkpoint_dir=None,
                     checkpoint_path=None):
-    """Load a specific model checkpoint from disk. 
+    """Load a specific model checkpoint from disk.
 
     Args:
         model (Layer): model to load parameters.
@@ -74,8 +70,8 @@ def load_parameters(model,
             Defaults to None.
         checkpoint_dir (str, optional): the directory where checkpoint is saved.
         checkpoint_path (str, optional): if specified, load the checkpoint
-            stored in the checkpoint_path and the argument 'checkpoint_dir' will 
-            be ignored. Defaults to None. 
+            stored in the checkpoint_path and the argument 'checkpoint_dir' will
+            be ignored. Defaults to None.
 
     Returns:
         iteration (int): number of iterations that the loaded checkpoint has 
@@ -137,7 +133,6 @@ def save_parameters(checkpoint_dir, iteration, model, optimizer=None):
         opt_dict = optimizer.state_dict()
         optimizer_path = checkpoint_path + ".pdopt"
         paddle.save(opt_dict, optimizer_path)
-        print("[checkpoint] Saved optimzier state to {}".format(
-            optimizer_path))
+        print("[checkpoint] Saved optimzier state to {}".format(optimizer_path))
 
     _save_checkpoint(checkpoint_dir, iteration)
diff --git a/parakeet/utils/display.py b/parakeet/utils/display.py
index faf27e7..af7d44e 100644
--- a/parakeet/utils/display.py
+++ b/parakeet/utils/display.py
@@ -11,13 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import numpy as np
-import matplotlib
-import librosa
 import librosa.display
 import matplotlib.pylab as plt
-from matplotlib import cm, pyplot
 
 __all__ = [
     "plot_alignment",
diff --git a/parakeet/utils/h5_utils.py b/parakeet/utils/h5_utils.py
index cd0c670..d0e277d 100644
--- a/parakeet/utils/h5_utils.py
+++ b/parakeet/utils/h5_utils.py
@@ -11,11 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from pathlib import Path
-from typing import Union, Any
-import sys
 import logging
+import sys
+from pathlib import Path
+from typing import Any
+from typing import Union
+
 import h5py
 import numpy as np
 
@@ -44,8 +45,7 @@ def read_hdf5(filename: Union[Path, str], dataset_name: str) -> Any:
     hdf5_file = h5py.File(filename, "r")
 
     if dataset_name not in hdf5_file:
-        logging.error(
-            f"There is no such a data in hdf5 file. ({dataset_name})")
+        logging.error(f"There is no such a data in hdf5 file. ({dataset_name})")
         sys.exit(1)
 
     # [()]: a special syntax of h5py to get the dataset as-is
diff --git a/parakeet/utils/internals.py b/parakeet/utils/internals.py
index 968a604..6c10bd2 100644
--- a/parakeet/utils/internals.py
+++ b/parakeet/utils/internals.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import numpy as np
 from paddle.framework import core
 
diff --git a/parakeet/utils/layer_tools.py b/parakeet/utils/layer_tools.py
index fcda44f..6e971f9 100644
--- a/parakeet/utils/layer_tools.py
+++ b/parakeet/utils/layer_tools.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import numpy as np
 from paddle import nn
 
@@ -42,7 +41,7 @@ def recursively_remove_weight_norm(layer: nn.Layer):
     for layer in layer.sublayers():
         try:
             nn.utils.remove_weight_norm(layer)
-        except:
+        except Exception as e:
             # ther is not weight norm hoom in this layer
             pass
 
diff --git a/parakeet/utils/mp_tools.py b/parakeet/utils/mp_tools.py
index edc1845..ed8c83e 100644
--- a/parakeet/utils/mp_tools.py
+++ b/parakeet/utils/mp_tools.py
@@ -11,11 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import paddle
-from paddle import distributed as dist
 from functools import wraps
 
+from paddle import distributed as dist
+
 __all__ = ["rank_zero_only"]
 
 
diff --git a/parakeet/utils/profile.py b/parakeet/utils/profile.py
index cfffb4b..5f9b495 100644
--- a/parakeet/utils/profile.py
+++ b/parakeet/utils/profile.py
@@ -11,11 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from contextlib import contextmanager
 
 import paddle
 from paddle.framework import core
 from paddle.framework import CUDAPlace
-from contextlib import contextmanager
 
 
 def synchronize():
diff --git a/parakeet/utils/scheduler.py b/parakeet/utils/scheduler.py
index 4d41aca..9338995 100644
--- a/parakeet/utils/scheduler.py
+++ b/parakeet/utils/scheduler.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import math
-
 __all__ = ["SchedulerBase", "Constant", "PieceWise", "StepWise"]
 
 
diff --git a/parakeet/utils/timeline.py b/parakeet/utils/timeline.py
index 119a2e9..0a5509d 100644
--- a/parakeet/utils/timeline.py
+++ b/parakeet/utils/timeline.py
@@ -11,15 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import argparse
 import json
-import six
-import sys
-import unittest
 
-import google.protobuf.text_format as text_format
 import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2
+import six
 
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
@@ -167,19 +163,19 @@ class Timeline(object):
                     if (k, mevent.device_id, "GPU") not in self._mem_devices:
                         pid = self._allocate_pid()
                         self._mem_devices[(k, mevent.device_id, "GPU")] = pid
-                        self._chrome_trace.emit_pid("memory usage on %s:gpu:%d"
-                                                    % (k, mevent.device_id),
-                                                    pid)
+                        self._chrome_trace.emit_pid(
+                            "memory usage on %s:gpu:%d" % (k, mevent.device_id),
+                            pid)
                 elif mevent.place == profiler_pb2.MemEvent.CPUPlace:
                     if (k, mevent.device_id, "CPU") not in self._mem_devices:
                         pid = self._allocate_pid()
                         self._mem_devices[(k, mevent.device_id, "CPU")] = pid
-                        self._chrome_trace.emit_pid("memory usage on %s:cpu:%d"
-                                                    % (k, mevent.device_id),
-                                                    pid)
+                        self._chrome_trace.emit_pid(
+                            "memory usage on %s:cpu:%d" % (k, mevent.device_id),
+                            pid)
                 elif mevent.place == profiler_pb2.MemEvent.CUDAPinnedPlace:
-                    if (k, mevent.device_id, "CUDAPinnedPlace"
-                        ) not in self._mem_devices:
+                    if (k, mevent.device_id,
+                            "CUDAPinnedPlace") not in self._mem_devices:
                         pid = self._allocate_pid()
                         self._mem_devices[(k, mevent.device_id,
                                            "CUDAPinnedPlace")] = pid
@@ -190,9 +186,9 @@ class Timeline(object):
                     if (k, mevent.device_id, "NPU") not in self._mem_devices:
                         pid = self._allocate_pid()
                         self._mem_devices[(k, mevent.device_id, "NPU")] = pid
-                        self._chrome_trace.emit_pid("memory usage on %s:npu:%d"
-                                                    % (k, mevent.device_id),
-                                                    pid)
+                        self._chrome_trace.emit_pid(
+                            "memory usage on %s:npu:%d" % (k, mevent.device_id),
+                            pid)
                 if (k, 0, "CPU") not in self._mem_devices:
                     pid = self._allocate_pid()
                     self._mem_devices[(k, 0, "CPU")] = pid
@@ -273,14 +269,14 @@ class Timeline(object):
             total_size = 0
             while i < len(mem_list):
                 total_size += mem_list[i]['size']
-                while i < len(mem_list) - 1 and mem_list[i][
-                        'time'] == mem_list[i + 1]['time']:
+                while i < len(mem_list) - 1 and mem_list[i]['time'] == mem_list[
+                        i + 1]['time']:
                     total_size += mem_list[i + 1]['size']
                     i += 1
 
                 self._chrome_trace.emit_counter(
-                    "Memory", "Memory", mem_list[i]['pid'],
-                    mem_list[i]['time'], 0, total_size)
+                    "Memory", "Memory", mem_list[i]['pid'], mem_list[i]['time'],
+                    0, total_size)
                 i += 1
 
     def generate_chrome_trace(self):
diff --git a/setup.py b/setup.py
index 7408415..4123f3f 100644
--- a/setup.py
+++ b/setup.py
@@ -11,12 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import os
 import io
+import os
 import re
-import sys
-from setuptools import setup, find_packages
+
+from setuptools import find_packages
+from setuptools import setup
 
 
 def read(*names, **kwargs):
@@ -80,7 +80,9 @@ setup_info = dict(
         'jieba',
         "phkit",
     ],
-    extras_require={'doc': ["sphinx", "sphinx-rtd-theme", "numpydoc"], },
+    extras_require={
+        'doc': ["sphinx", "sphinx-rtd-theme", "numpydoc"],
+    },
 
     # Package info
     packages=find_packages(exclude=('tests', 'tests.*')),
diff --git a/tests/unit/test_data_table.py b/tests/unit/test_data_table.py
index aca0605..3664ea3 100644
--- a/tests/unit/test_data_table.py
+++ b/tests/unit/test_data_table.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from parakeet.datasets.data_tabel import DataTable
 
 
diff --git a/tests/unit/test_expansion.py b/tests/unit/test_expansion.py
index d548993..418e9ba 100644
--- a/tests/unit/test_expansion.py
+++ b/tests/unit/test_expansion.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import paddle
+
 from parakeet.modules import expansion
 
 
diff --git a/tests/unit/test_optimizer.py b/tests/unit/test_optimizer.py
index bdb3d96..74f5036 100644
--- a/tests/unit/test_optimizer.py
+++ b/tests/unit/test_optimizer.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import shutil
 from pathlib import Path
 
diff --git a/tests/unit/test_pwg.py b/tests/unit/test_pwg.py
index 0978714..2f07a4a 100644
--- a/tests/unit/test_pwg.py
+++ b/tests/unit/test_pwg.py
@@ -11,18 +11,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import paddle
 import torch
-from timer import timer
-from parallel_wavegan.layers import upsample, residual_block
+from parallel_wavegan.layers import residual_block
+from parallel_wavegan.layers import upsample
 from parallel_wavegan.models import parallel_wavegan as pwgan
+from timer import timer
+
+from parakeet.models.parallel_wavegan import ConvInUpsampleNet
+from parakeet.models.parallel_wavegan import PWGDiscriminator
+from parakeet.models.parallel_wavegan import PWGGenerator
+from parakeet.models.parallel_wavegan import ResidualBlock
+from parakeet.models.parallel_wavegan import ResidualPWGDiscriminator
 from parakeet.utils.layer_tools import summary
 from parakeet.utils.profile import synchronize
 
-from parakeet.models.parallel_wavegan import ConvInUpsampleNet, ResidualBlock
-from parakeet.models.parallel_wavegan import PWGGenerator, PWGDiscriminator, ResidualPWGDiscriminator
-
 paddle.set_device("gpu:0")
 device = torch.device("cuda:0")
 
diff --git a/tests/unit/test_raise.py b/tests/unit/test_raise.py
index a4a5e70..7abdadf 100644
--- a/tests/unit/test_raise.py
+++ b/tests/unit/test_raise.py
@@ -11,14 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import math
-import numpy as np
-
 import paddle
-from paddle import Tensor
 from paddle.static import InputSpec
-from paddle.nn import functional as F
 
 
 def sinusoid_position_encoding(num_positions: int,
@@ -52,6 +46,5 @@ def call_it(x):
 
 call_it(paddle.randn([8, 32]))
 m = paddle.jit.to_static(
-    call_it, input_spec=[InputSpec(
-        [-1, -1], dtype=paddle.int32)])
+    call_it, input_spec=[InputSpec([-1, -1], dtype=paddle.int32)])
 m(paddle.randn([8, 32]).astype(paddle.int32))
diff --git a/tests/unit/test_reporter.py b/tests/unit/test_reporter.py
index cd40364..bba81d6 100644
--- a/tests/unit/test_reporter.py
+++ b/tests/unit/test_reporter.py
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import numpy as np
-from parakeet.training.reporter import report, scope
-from parakeet.training.reporter import Summary, DictSummary
+
+from parakeet.training.reporter import report
+from parakeet.training.reporter import scope
+from parakeet.training.reporter import Summary
 
 
 def test_reporter_scope():
diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py
index 71e422c..e940a81 100644
--- a/tests/unit/test_snapshot.py
+++ b/tests/unit/test_snapshot.py
@@ -11,19 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from pathlib import Path
 import shutil
-
-import numpy as np
-import paddle
-from paddle import nn
-from paddle.optimizer import Adam
 from itertools import count
 
-from parakeet.training.updater import StandardUpdater
-from parakeet.training.trainer import Trainer
+from paddle import nn
+from paddle.optimizer import Adam
+
 from parakeet.training.extensions.snapshot import Snapshot
+from parakeet.training.trainer import Trainer
+from parakeet.training.updater import StandardUpdater
 
 
 def test_snapshot():
diff --git a/tests/unit/test_stft.py b/tests/unit/test_stft.py
index c985235..8e6ce47 100644
--- a/tests/unit/test_stft.py
+++ b/tests/unit/test_stft.py
@@ -11,15 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import paddle
-import torch
 import librosa
 import numpy as np
-from parakeet.modules.stft_loss import STFT, MultiResolutionSTFTLoss
+import paddle
+import torch
 from parallel_wavegan.losses import stft_loss as sl
 from scipy import signal
 
+from parakeet.modules.stft_loss import MultiResolutionSTFTLoss
+from parakeet.modules.stft_loss import STFT
+
 
 def test_stft():
     stft = STFT(n_fft=1024, hop_length=256, win_length=1024)
@@ -34,8 +35,7 @@ def test_stft():
         window=torch.as_tensor(window))
     S2 = (D2**2).sum(-1).sqrt()
     S3 = np.abs(
-        librosa.stft(
-            x.numpy()[0], n_fft=1024, hop_length=256, win_length=1024))
+        librosa.stft(x.numpy()[0], n_fft=1024, hop_length=256, win_length=1024))
     print(S2.shape)
     print(S.numpy()[0])
     print(S2.data.cpu().numpy()[0])
diff --git a/tests/unit/test_to_static.py b/tests/unit/test_to_static.py
index 251d492..b8ff300 100644
--- a/tests/unit/test_to_static.py
+++ b/tests/unit/test_to_static.py
@@ -11,12 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import math
 
 import paddle
 from paddle import nn
-from paddle.jit import to_static, save
+from paddle.jit import to_static
 from paddle.static import InputSpec