From b674f63d747a80a95cde6a5c293f6f10c1bc6193 Mon Sep 17 00:00:00 2001 From: iclementine Date: Thu, 8 Apr 2021 04:59:29 +0800 Subject: [PATCH] add 2 frontend --- parakeet/frontend/arpabet.py | 286 +++++++++++++++++++++++++++++++ parakeet/frontend/pinyin.py | 320 +++++++++++++++++++++++++++++++++++ 2 files changed, 606 insertions(+) create mode 100644 parakeet/frontend/arpabet.py create mode 100644 parakeet/frontend/pinyin.py diff --git a/parakeet/frontend/arpabet.py b/parakeet/frontend/arpabet.py new file mode 100644 index 0000000..f683462 --- /dev/null +++ b/parakeet/frontend/arpabet.py @@ -0,0 +1,286 @@ +from parakeet.frontend.phonectic import Phonetics +""" +A phonology system with ARPABET symbols and limited punctuations. The G2P +conversion is done by g2p_en. + +Note that g2p_en does not handle words with hypen well. So make sure the input +sentence is first normalized. +""" +from parakeet.frontend.vocab import Vocab +from g2p_en import G2p + + + +class ARPABET(Phonetics): + """A phonology for English that uses ARPABET as the phoneme vocabulary. + See http://www.speech.cs.cmu.edu/cgi-bin/cmudict for more details. + Phoneme Example Translation + ------- ------- ----------- + AA odd AA D + AE at AE T + AH hut HH AH T + AO ought AO T + AW cow K AW + AY hide HH AY D + B be B IY + CH cheese CH IY Z + D dee D IY + DH thee DH IY + EH Ed EH D + ER hurt HH ER T + EY ate EY T + F fee F IY + G green G R IY N + HH he HH IY + IH it IH T + IY eat IY T + JH gee JH IY + K key K IY + L lee L IY + M me M IY + N knee N IY + NG ping P IH NG + OW oat OW T + OY toy T OY + P pee P IY + R read R IY D + S sea S IY + SH she SH IY + T tea T IY + TH theta TH EY T AH + UH hood HH UH D + UW two T UW + V vee V IY + W we W IY + Y yield Y IY L D + Z zee Z IY + ZH seizure S IY ZH ER + """ + phonemes = [ + 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER', + 'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', + 'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UW', 'UH', 'V', 'W', 'Y', 'Z', + 'ZH' + ] + punctuations = [',', '.', '?', '!'] + symbols = phonemes + punctuations + _stress_to_no_stress_ = { + 'AA0': 'AA', + 'AA1': 'AA', + 'AA2': 'AA', + 'AE0': 'AE', + 'AE1': 'AE', + 'AE2': 'AE', + 'AH0': 'AH', + 'AH1': 'AH', + 'AH2': 'AH', + 'AO0': 'AO', + 'AO1': 'AO', + 'AO2': 'AO', + 'AW0': 'AW', + 'AW1': 'AW', + 'AW2': 'AW', + 'AY0': 'AY', + 'AY1': 'AY', + 'AY2': 'AY', + 'EH0': 'EH', + 'EH1': 'EH', + 'EH2': 'EH', + 'ER0': 'ER', + 'ER1': 'ER', + 'ER2': 'ER', + 'EY0': 'EY', + 'EY1': 'EY', + 'EY2': 'EY', + 'IH0': 'IH', + 'IH1': 'IH', + 'IH2': 'IH', + 'IY0': 'IY', + 'IY1': 'IY', + 'IY2': 'IY', + 'OW0': 'OW', + 'OW1': 'OW', + 'OW2': 'OW', + 'OY0': 'OY', + 'OY1': 'OY', + 'OY2': 'OY', + 'UH0': 'UH', + 'UH1': 'UH', + 'UH2': 'UH', + 'UW0': 'UW', + 'UW1': 'UW', + 'UW2': 'UW' + } + + def __init__(self): + self.backend = G2p() + self.vocab = Vocab(self.phonemes + self.punctuations) + + def _remove_vowels(self, phone): + return self._stress_to_no_stress_.get(phone, phone) + + def phoneticize(self, sentence, add_start_end=False): + """ Normalize the input text sequence and convert it into pronunciation sequence. + + Parameters + ----------- + sentence: str + The input text sequence. + + Returns + ---------- + List[str] + The list of pronunciation sequence. + """ + phonemes = [self._remove_vowels(item) for item in self.backend(sentence)] + if add_start_end: + start = self.vocab.start_symbol + end = self.vocab.end_symbol + phonemes = [start] + phonemes + [end] + phonemes = [item for item in phonemes if item in self.vocab.stoi] + return phonemes + + def numericalize(self, phonemes): + """ Convert pronunciation sequence into pronunciation id sequence. + + Parameters + ----------- + phonemes: List[str] + The list of pronunciation sequence. + + Returns + ---------- + List[int] + The list of pronunciation id sequence. + """ + ids = [self.vocab.lookup(item) for item in phonemes] + return ids + + def reverse(self, ids): + """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. + + Parameters + ----------- + ids: List[int] + The list of pronunciation id sequence. + + Returns + ---------- + List[str] + The list of pronunciation sequence. + """ + return [self.vocab.reverse(i) for i in ids] + + def __call__(self, sentence, add_start_end=False): + """ Convert the input text sequence into pronunciation id sequence. + + Parameters + ----------- + sentence: str + The input text sequence. + + Returns + ---------- + List[str] + The list of pronunciation id sequence. + """ + return self.numericalize(self.phoneticize(sentence, add_start_end=add_start_end)) + + @property + def vocab_size(self): + """ Vocab size. + """ + # 47 = 39 phones + 4 punctuations + 4 special tokens + return len(self.vocab) + + +class ARPABETWithStress(Phonetics): + phonemes = [ + 'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0', + 'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D', + 'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2', + 'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', + 'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', + 'R', 'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2', + 'V', 'W', 'Y', 'Z', 'ZH' + ] + punctuations = [',', '.', '?', '!'] + symbols = phonemes + punctuations + + def __init__(self): + self.backend = G2p() + self.vocab = Vocab(self.phonemes + self.punctuations) + + def phoneticize(self, sentence, add_start_end=False): + """ Normalize the input text sequence and convert it into pronunciation sequence. + + Parameters + ----------- + sentence: str + The input text sequence. + + Returns + ---------- + List[str] + The list of pronunciation sequence. + """ + phonemes = self.backend(sentence) + if add_start_end: + start = self.vocab.start_symbol + end = self.vocab.end_symbol + phonemes = [start] + phonemes + [end] + phonemes = [item for item in phonemes if item in self.vocab.stoi] + return phonemes + + def numericalize(self, phonemes): + """ Convert pronunciation sequence into pronunciation id sequence. + + Parameters + ----------- + phonemes: List[str] + The list of pronunciation sequence. + + Returns + ---------- + List[int] + The list of pronunciation id sequence. + """ + ids = [self.vocab.lookup(item) for item in phonemes] + return ids + + def reverse(self, ids): + """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. + + Parameters + ----------- + ids: List[int] + The list of pronunciation id sequence. + + Returns + ---------- + List[str] + The list of pronunciation sequence. + """ + return [self.vocab.reverse(i) for i in ids] + + def __call__(self, sentence, add_start_end=False): + """ Convert the input text sequence into pronunciation id sequence. + + Parameters + ----------- + sentence: str + The input text sequence. + + Returns + ---------- + List[str] + The list of pronunciation id sequence. + """ + return self.numericalize(self.phoneticize(sentence, add_start_end=add_start_end)) + + @property + def vocab_size(self): + """ Vocab size. + """ + # 77 = 69 phones + 4 punctuations + 4 special tokens + return len(self.vocab) diff --git a/parakeet/frontend/pinyin.py b/parakeet/frontend/pinyin.py new file mode 100644 index 0000000..de3680b --- /dev/null +++ b/parakeet/frontend/pinyin.py @@ -0,0 +1,320 @@ +""" +A Simple Chinese Phonology using pinyin symbols. +The G2P conversion converts pinyin string to symbols. Also it can handle string +in Chinese chracters, but due to the complexity of chinese G2P, we can leave +text -> pinyin to other part of a TTS system. Other NLP techniques may be used +(e.g. tokenization, tagging, NER...) +""" +import re +from parakeet.frontend.phonectic import Phonetics +from parakeet.frontend.vocab import Vocab +import pypinyin +from pypinyin.core import Pinyin, Style +from pypinyin.core import DefaultConverter +from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin +from itertools import product + +_punctuations = [',', '。', '?', '!'] +_initials = [ + 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'j', 'q', 'x', 'zh', + 'ch', 'sh', 'r', 'z', 'c', 's' +] +_finals = [ + 'ii', 'iii', 'a', 'o', 'e', 'ea', 'ai', 'ei', 'ao', 'ou', 'an', 'en', + 'ang', 'eng', 'er', 'i', 'ia', 'io', 'ie', 'iai', 'iao', 'iou', 'ian', + 'ien', 'iang', 'ieng', 'u', 'ua', 'uo', 'uai', 'uei', 'uan', 'uen', 'uang', + 'ueng', 'v', 've', 'van', 'ven', 'veng' +] +_ernized_symbol = ['&r'] +_phones = _initials + _finals + _ernized_symbol + _punctuations +_tones = ['0', '1', '2', '3', '4', '5'] + +_toned_finals = [final + tone for final, tone in product(_finals, _tones[1:])] +_toned_phonems = _initials + _toned_finals + _ernized_symbol + _punctuations + +class ParakeetConverter(NeutralToneWith5Mixin, DefaultConverter): + pass + + +class ParakeetPinyin(Phonetics): + def __init__(self): + self.vocab_phonemes = Vocab(_phones) + self.vocab_tones = Vocab(_tones) + self.pinyin_backend = Pinyin(ParakeetConverter()) + + def convert_pypinyin_tone3(self, syllables, add_start_end=False): + phonemes, tones = _convert_to_parakeet_style_pinyin(syllables) + + if add_start_end: + start = self.vocab_phonemes.start_symbol + end = self.vocab_phonemes.end_symbol + phonemes = [start] + phonemes + [end] + + start = self.vocab_tones.start_symbol + end = self.vocab_tones.end_symbol + phonemes = [start] + tones + [end] + + phonemes = [ + item for item in phonemes if item in self.vocab_phonemes.stoi + ] + tones = [item for item in tones if item in self.vocab_tones.stoi] + return phonemes, tones + + + def phoneticize(self, sentence, add_start_end=False): + """ Normalize the input text sequence and convert it into pronunciation sequence. + + Parameters + ----------- + sentence: str + The input text sequence. + + Returns + ---------- + List[str] + The list of pronunciation sequence. + """ + syllables = self.pinyin_backend.lazy_pinyin(sentence, + style=Style.TONE3, + strict=True) + phonemes, tones = self.convert_pypinyin_tone3(syllables, add_start_end=add_start_end) + return phonemes, tones + + def numericalize(self, phonemes, tones): + """ Convert pronunciation sequence into pronunciation id sequence. + + Parameters + ----------- + phonemes: List[str] + The list of pronunciation sequence. + + Returns + ---------- + List[int] + The list of pronunciation id sequence. + """ + phoneme_ids = [self.vocab_phonemes.lookup(item) for item in phonemes] + tone_ids = [self.vocab_tones.lookup(item) for item in tones] + return phoneme_ids, tone_ids + + def __call__(self, sentence, add_start_end=False): + """ Convert the input text sequence into pronunciation id sequence. + + Parameters + ----------- + sentence: str + The input text sequence. + + Returns + ---------- + List[str] + The list of pronunciation id sequence. + """ + phonemes, tones = self.phoneticize(sentence, + add_start_end=add_start_end) + phoneme_ids, tone_ids = self.numericalize(phonemes, tones) + return phoneme_ids, tone_ids + + @property + def vocab_size(self): + """ Vocab size. + """ + # 70 = 62 phones + 4 punctuations + 4 special tokens + return len(self.vocab_phonemes) + + @property + def tone_vocab_size(self): + # 10 = 1 non tone + 5 tone + 4 special tokens + return len(self.vocab_tones) + + + +class ParakeetPinyinWithTone(Phonetics): + def __init__(self): + self.vocab = Vocab(_toned_phonems) + self.pinyin_backend = Pinyin(ParakeetConverter()) + + def convert_pypinyin_tone3(self, syllables, add_start_end=False): + phonemes = _convert_to_parakeet_style_pinyin_with_tone(syllables) + + if add_start_end: + start = self.vocab_phonemes.start_symbol + end = self.vocab_phonemes.end_symbol + phonemes = [start] + phonemes + [end] + + phonemes = [ + item for item in phonemes if item in self.vocab.stoi + ] + return phonemes + + def phoneticize(self, sentence, add_start_end=False): + """ Normalize the input text sequence and convert it into pronunciation sequence. + + Parameters + ----------- + sentence: str + The input text sequence. + + Returns + ---------- + List[str] + The list of pronunciation sequence. + """ + syllables = self.pinyin_backend.lazy_pinyin(sentence, + style=Style.TONE3, + strict=True) + phonemes = self.convert_pypinyin_tone3(syllables, add_start_end=add_start_end) + return phonemes + + def numericalize(self, phonemes): + """ Convert pronunciation sequence into pronunciation id sequence. + + Parameters + ----------- + phonemes: List[str] + The list of pronunciation sequence. + + Returns + ---------- + List[int] + The list of pronunciation id sequence. + """ + phoneme_ids = [self.vocab.lookup(item) for item in phonemes] + return phoneme_ids + + def __call__(self, sentence, add_start_end=False): + """ Convert the input text sequence into pronunciation id sequence. + + Parameters + ----------- + sentence: str + The input text sequence. + + Returns + ---------- + List[str] + The list of pronunciation id sequence. + """ + phonemes = self.phoneticize(sentence, add_start_end=add_start_end) + phoneme_ids = self.numericalize(phonemes) + return phoneme_ids + + @property + def vocab_size(self): + """ Vocab size. + """ + # 230 = 222 phones + 4 punctuations + 4 special tokens + return len(self.vocab) + + +def _convert_to_parakeet_convension(syllable): + # from pypinyin.Style.TONE3 to parakeet convension + tone = syllable[-1] + syllable = syllable[:-1] + + # expansion of o -> uo + syllable = re.sub(r"([bpmf])o$", r"\1uo", syllable) + + # expansion for iong, ong + syllable = syllable.replace("iong", "veng").replace("ong", "ueng") + + # expansion for ing, in + syllable = syllable.replace("ing", "ieng").replace("in", "ien") + + # expansion for un, ui, iu + syllable = syllable.replace("un","uen")\ + .replace("ui", "uei")\ + .replace("iu", "iou") + + # rule for variants of i + syllable = syllable.replace("zi", "zii")\ + .replace("ci", "cii")\ + .replace("si", "sii")\ + .replace("zhi", "zhiii")\ + .replace("chi", "chiii")\ + .replace("shi", "shiii")\ + .replace("ri", "riii") + + # rule for y preceding i, u + syllable = syllable.replace("yi", "i").replace("yu", "v").replace("y", "i") + + # rule for w + syllable = syllable.replace("wu", "u").replace("w", "u") + + # rule for v following j, q, x + syllable = syllable.replace("ju", "jv")\ + .replace("qu", "qv")\ + .replace("xu", "xv") + + return syllable + tone + + +def _split_syllable(syllable: str): + global _punctuations + + if syllable in _punctuations: + # syllables, tones + return [syllable], ['0'] + + syllable = _convert_to_parakeet_convension(syllable) + + tone = syllable[-1] + syllable = syllable[:-1] + + phones = [] + tones = [] + + global _initials + if syllable[:2] in _initials: + phones.append(syllable[:2]) + tones.append('0') + phones.append(syllable[2:]) + tones.append(tone) + elif syllable[0] in _initials: + phones.append(syllable[0]) + tones.append('0') + phones.append(syllable[1:]) + tones.append(tone) + else: + phones.append(syllable) + tones.append(tone) + return phones, tones + + +def _convert_to_parakeet_style_pinyin(syllables): + phones, tones = [], [] + for syllable in syllables: + p, t = _split_syllable(syllable) + phones.extend(p) + tones.extend(t) + return phones, tones + +def _split_syllable_with_tone(syllable: str): + global _punctuations + + if syllable in _punctuations: + # syllables + return [syllable] + + syllable = _convert_to_parakeet_convension(syllable) + + phones = [] + + global _initials + if syllable[:2] in _initials: + phones.append(syllable[:2]) + phones.append(syllable[2:]) + elif syllable[0] in _initials: + phones.append(syllable[0]) + phones.append(syllable[1:]) + else: + phones.append(syllable) + return phones + +def _convert_to_parakeet_style_pinyin_with_tone(syllables): + phones = [] + for syllable in syllables: + p = _split_syllable_with_tone(syllable) + phones.extend(p) + return phones +