diff --git a/examples/use_mfa/local/generate_lexicon.py b/examples/use_mfa/local/generate_lexicon.py
index 5fb00e5..1791e7b 100644
--- a/examples/use_mfa/local/generate_lexicon.py
+++ b/examples/use_mfa/local/generate_lexicon.py
@@ -11,6 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""Generate lexicon and symbols for Mandarin Chinese phonology.
+The lexicon is used for Montreal Force Aligner.
+
+Note that syllables are used as word in this lexicon. Since syllables rather 
+than words are used in transcriptions produced by `reorganize_baker.py`.
+
+We make this choice to better leverage other software for chinese text to 
+pinyin tools like pypinyin. This is the convention for G2P in Chinese.
+"""
 
 import re
 import argparse
@@ -32,55 +41,68 @@ SPECIALS = ['sil', 'sp']
 
 
 def rule(C, V, R, T):
+    """Generate a syllable given the initial, the final, erhua indicator, and tone.
+    Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu)
+    
+    Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 
+    'u' in syllables when certain conditions are satisfied.
+    
+    'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'.
+
+    Erhua is is possibly applied to every finals, except for finals that already ends with 'r'.
+
+    When a syllable is impossible or does not have any characters with this pronunciation, return None
+    to filter it out.
+    """
 
     # 不可拼的音节, ii 只能和 z, c, s 拼
     if V in ["ii"] and (C not in ['z', 'c', 's']):
-        return
+        return None
     # iii 只能和 zh, ch, sh, r 拼
     if V in ['iii'] and (C not in ['zh', 'ch', 'sh', 'r']):
-        return
+        return None
 
     # 齐齿呼或者撮口呼不能和 f, g, k, h, zh, ch, sh, r, z, c, s
     if (V not in ['ii', 'iii']) and V[0] in ['i', 'v'] and (
             C in ['f', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's']):
-        return
+        return None
 
     # 撮口呼只能和 j, q, x l, n 拼
     if V.startswith("v"):
         # v, ve 只能和 j ,q , x, n, l 拼
         if V in ['v', 've']:
             if C not in ['j', 'q', 'x', 'n', 'l', '']:
-                return
+                return None
         # 其他只能和 j, q, x 拼
         else:
             if C not in ['j', 'q', 'x', '']:
-                return
+                return None
 
     # j, q, x 只能和齐齿呼或者撮口呼拼
     if (C in ['j', 'q', 'x']) and not (
         (V not in ['ii', 'iii']) and V[0] in ['i', 'v']):
-        return
+        return None
 
     # b, p ,m, f 不能和合口呼拼，除了 u 之外
     # bm p, m, f 不能和撮口呼拼
     if (C in ['b', 'p', 'm', 'f']) and ((V[0] in ['u', 'v'] and V != "u") or
                                         V == 'ong'):
-        return
+        return None
 
     # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼
     if V in ['ua', 'uai', 'uang'
              ] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
-        return
+        return None
 
     # sh 和 ong 不能拼
     if V == 'ong' and C in ['sh']:
-        return
+        return None
 
     # o 和 gkh, zh ch sh r z c s 不能拼
     if V == "o" and C in [
             'd', 't', 'n', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's'
     ]:
-        return
+        return None
 
     # ueng 只是 weng 这个 ad-hoc 其他情况下都是 ong
     if V == 'ueng' and C != '':
@@ -88,7 +110,7 @@ def rule(C, V, R, T):
 
     # 非儿化的 er 只能单独存在
     if V == 'er' and C != '':
-        return
+        return None
 
     if C == '':
         if V in ["i", "in", "ing"]:
@@ -118,7 +140,7 @@ def rule(C, V, R, T):
 
     # Filter  er 不能再儿化
     if result.endswith('r') and R == 'r':
-        return
+        return None
 
     # ii and iii, change back to i
     result = re.sub(r'i+', 'i', result)
@@ -127,13 +149,13 @@ def rule(C, V, R, T):
     return result
 
 
-def generate_lexicon(with_tone=False, with_r=False):
-    # generate lexicon withou tone and erhua
+def generate_lexicon(with_tone=False, with_erhua=False):
+    """Generate lexicon for Mandarin Chinese."""
     syllables = OrderedDict()
 
     for C in [''] + INITIALS:
         for V in FINALS:
-            for R in [''] if not with_r else ['', 'r']:
+            for R in [''] if not with_erhua else ['', 'r']:
                 for T in [''] if not with_tone else ['1', '2', '3', '4', '5']:
                     result = rule(C, V, R, T)
                     if result:
@@ -142,11 +164,12 @@ def generate_lexicon(with_tone=False, with_r=False):
 
 
 def generate_symbols(lexicon):
+    """Generate phoneme list for a lexicon."""
     symbols = set()
     for p in SPECIALS:
         symbols.add(p)
-    for syllable, phonems in lexicon.items():
-        phonemes = phonems.split()
+    for syllable, phonemes in lexicon.items():
+        phonemes = phonemes.split()
         for p in phonemes:
             symbols.add(p)
     return sorted(list(symbols))
diff --git a/examples/use_mfa/local/recorganize_baker.py b/examples/use_mfa/local/reorganize_baker.py
similarity index 88%
rename from examples/use_mfa/local/recorganize_baker.py
rename to examples/use_mfa/local/reorganize_baker.py
index b1f6b14..fb41751 100644
--- a/examples/use_mfa/local/recorganize_baker.py
+++ b/examples/use_mfa/local/reorganize_baker.py
@@ -11,16 +11,27 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""Script to reorganize Baker dataset so as to use Montreal Force
+Aligner to align transcription and audio.
+
+Please refer to https://montreal-forced-aligner.readthedocs.io/en/latest/data_prep.html
+for more details about Montreal Force Aligner's requirements on cotpus.
+
+For scripts to reorganize other corpus, please refer to 
+ https://github.com/MontrealCorpusTools/MFA-reorganization-scripts
+for more details.
+"""
 
-from typing import Union
-from concurrent.futures import ThreadPoolExecutor
-from pathlib import Path
-import soundfile as sf
-import librosa
-from tqdm import tqdm
 import os
 import shutil
 import argparse
+from typing import Union
+from pathlib import Path
+from concurrent.futures import ThreadPoolExecutor
+
+import librosa
+import soundfile as sf
+from tqdm import tqdm
 
 
 def get_transcripts(path: Union[str, Path]):
@@ -54,7 +65,7 @@ def reorganize_baker(root_dir: Union[str, Path],
     transcriptions = get_transcripts(transcript_path)
 
     wave_dir = root_dir / "Wave"
-    wav_paths = list(wave_dir.glob("*.wav"))
+    wav_paths = sorted(list(wave_dir.glob("*.wav")))
     output_dir = Path(output_dir).expanduser()
     assert wave_dir != output_dir, "Don't use an the original wav's directory as output_dir"
 
diff --git a/examples/use_mfa/run.sh b/examples/use_mfa/run.sh
index 445a90a..c339408 100644
--- a/examples/use_mfa/run.sh
+++ b/examples/use_mfa/run.sh
@@ -11,7 +11,9 @@ fi
 if [ ! -d $EXP_DIR/baker_corpus ]; then
     echo "reorganizing baker corpus..."
     python local/recorganize_baker.py --root-dir=~/datasets/BZNSYP --output-dir=$EXP_DIR/baker_corpus --resample-audio
-    echo "reorganization done."
+    echo "reorganization done. Check output in $EXP_DIR/baker_corpus."
+    echo "audio files are resampled to 16kHz"
+    echo "transcription for each audio file is saved with the same namd in $EXP_DIR/baker_corpus "
 fi
 
 echo "detecting oov..."
@@ -37,7 +39,9 @@ export PATH="$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin"
 if [ ! -d "$EXP_DIR/baker_alignment" ]; then
     echo "Start MFA training..."
     mfa_train_and_align $EXP_DIR/baker_corpus "$EXP_DIR/$LEXICON_NAME.lexicon" $EXP_DIR/baker_alignment -o $EXP_DIR/baker_model --clean --verbose --temp_directory exp/.mfa_train_and_align
-    echo "training done! \nresults: $EXP_DIR/baker_alignment \nmodel: $EXP_DIR/baker_model\n"
+    echo "training done!"
+    echo "results: $EXP_DIR/baker_alignment"
+    echo "model: $EXP_DIR/baker_model"
 fi