simplify text processing code and update notebook

2021-05-13 17:06:34 +08:00 · 2021-05-13 17:06:34 +08:00 · e1a7c296fe
parent 6a1fb158d9
commit e1a7c296fe
5 changed files with 19 additions and 196 deletions
--- a/examples/tacotron2_aishell3/chinese_g2p.py
+++ b/examples/tacotron2_aishell3/chinese_g2p.py
@ -13,9 +13,17 @@
 # limitations under the License.

 from typing import List, Tuple
+from pypinyin import lazy_pinyin, Style
+from preprocess_transcription import split_syllable

-from chinese_text_to_pinyin import convert_to_pinyin
-from chinese_phonology import split_syllable
+
+def convert_to_pinyin(text: str) -> List[str]:
+    """convert text into list of syllables, other characters that are not chinese, thus
+    cannot be converted to pinyin are splited.
+    """
+    syllables = lazy_pinyin(
+        text, style=Style.TONE3, neutral_tone_with_five=True)
+    return syllables


 def convert_sentence(text: str) -> List[Tuple[str]]:
--- a/examples/tacotron2_aishell3/chinese_phonology.py
+++ b/examples/tacotron2_aishell3/chinese_phonology.py
@ -1,158 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-A pinyin to phone transcription system for chinese.
-Syllables are splited as initial and final. 'er' is also treated as s special symbol.
-Tones are extracted and attached to finals.
-"""
-import re
-
-# initials for mandarin chinese
-# zero initials are not included
-_initials = {
-    "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh",
-    "ch", "sh", "r", "z", "c", "s"
-}
-
-# finals for mandarin chines
-# some symbols with different pronunciations are discriminated
-# e.g. i -> {i, ii, iii}
-# some symbols that are abbreviated are expanded
-# e.g. iu -> iou, ui -> uei, un -> uen, bo -> b uo
-# some symbols are transcripted according to zhuyin scheme
-# e,g, in -> ien, ong -> ueng, iong -> veng
-# üis  always replaced by v
-_finals = {
-    'ii',
-    'iii',
-    'a',
-    'o',
-    'e',
-    'ea',
-    'ai',
-    'ei',
-    'ao',
-    'ou',
-    'an',
-    'en',
-    'ang',
-    'eng',
-    'er',
-    'i',
-    'ia',
-    'io',
-    'ie',
-    'iai',
-    'iao',
-    'iou',
-    'ian',
-    'ien',
-    'iang',
-    'ieng',
-    'u',
-    'ua',
-    'uo',
-    'uai',
-    'uei',
-    'uan',
-    'uen',
-    'uang',
-    'ueng',
-    'v',
-    've',
-    'van',
-    'ven',
-    'veng',
-}
-
-# Er hua symbol
-# example tour2 -> phone: t ou &r, tone: 0 2 5
-_ernized_symbol = {'&r'}
-
-_specials = {'<pad>', '<unk>'}
-_pauses = {"%",
-           "$"}  # for different dataset, maybe you have to change this set
-
-_phones = _initials | _finals | _ernized_symbol | _specials | _pauses
-
-# 0: no tone, for initials
-# {1, 2, 3, 4}: for tones in chinese
-# 5: neutral tone
-# <pad>: special token for padding
-# <unk>: special token for unknown tone, though there will not be unknown tone
-_tones = {'<pad>', '<unk>', '0', '1', '2', '3', '4', '5'}
-
-
-def ernized(syllable):
-    return syllable[:2] != "er" and syllable[-2] == 'r'
-
-
-def convert(syllable):
-    # expansion of o -> uo
-    syllable = re.sub(r"([bpmf])o$", r"\1uo", syllable)
-    # syllable = syllable.replace("bo", "buo").replace("po", "puo").replace("mo", "muo").replace("fo", "fuo")
-    # expansion for iong, ong
-    syllable = syllable.replace("iong", "veng").replace("ong", "ueng")
-
-    # expansion for ing, in
-    syllable = syllable.replace("ing", "ieng").replace("in", "ien")
-
-    # expansion for un, ui, iu
-    syllable = syllable.replace("un", "uen").replace(
-        "ui", "uei").replace("iu", "iou")
-
-    # rule for variants of i
-    syllable = syllable.replace("zi", "zii").replace("ci", "cii").replace("si", "sii")\
-        .replace("zhi", "zhiii").replace("chi", "chiii").replace("shi", "shiii")\
-        .replace("ri", "riii")
-
-    # rule for y preceding i, u
-    syllable = syllable.replace("yi", "i").replace("yu", "v").replace("y", "i")
-
-    # rule for w
-    syllable = syllable.replace("wu", "u").replace("w", "u")
-
-    # rule for v following j, q, x
-    syllable = syllable.replace("ju", "jv").replace("qu",
-                                                    "qv").replace("xu", "xv")
-
-    return syllable
-
-
-def split_syllable(syllable: str):
-    if syllable in _pauses:
-        # phone, tone
-        return [syllable], ['0']
-
-    tone = syllable[-1]
-    syllable = convert(syllable[:-1])
-
-    phones = []
-    tones = []
-
-    global _initials
-    if syllable[:2] in _initials:
-        phones.append(syllable[:2])
-        tones.append('0')
-        phones.append(syllable[2:])
-        tones.append(tone)
-    elif syllable[0] in _initials:
-        phones.append(syllable[0])
-        tones.append('0')
-        phones.append(syllable[1:])
-        tones.append(tone)
-    else:
-        phones.append(syllable)
-        tones.append(tone)
-    return phones, tones
--- a/examples/tacotron2_aishell3/chinese_text_to_pinyin.py
+++ b/examples/tacotron2_aishell3/chinese_text_to_pinyin.py
@ -1,26 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import List
-
-from pypinyin import lazy_pinyin, Style
-
-
-def convert_to_pinyin(text: str) -> List[str]:
-    """convert text into list of syllables, other characters that are not chinese, thus
-    cannot be converted to pinyin are splited.
-    """
-    syllables = lazy_pinyin(
-        text, style=Style.TONE3, neutral_tone_with_five=True)
-    return syllables
--- a/examples/tacotron2_aishell3/voice_cloning.ipynb
+++ b/examples/tacotron2_aishell3/voice_cloning.ipynb
--- a/examples/transformer_tts/synthesize.py
+++ b/examples/transformer_tts/synthesize.py
@ -26,7 +26,6 @@ from parakeet.utils import display
 from config import get_cfg_defaults


-@paddle.no_grad()
 def main(config, args):
    paddle.set_device(args.device)