ParakeetRebeccaRosario/examples/fastspeech2/baker/frontend.py

77 lines
2.8 KiB
Python
Raw Normal View History

2021-07-22 18:31:34 +08:00
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import numpy as np
import paddle
2021-08-02 14:28:25 +08:00
from parakeet.frontend.cn_frontend import Frontend as cnFrontend
2021-07-22 18:31:34 +08:00
class Frontend():
2021-08-02 14:28:25 +08:00
def __init__(self, phone_vocab_path=None, tone_vocab_path=None):
self.frontend = cnFrontend()
2021-08-03 18:10:39 +08:00
self.vocab_phones = {}
self.vocab_tones = {}
2021-08-02 14:28:25 +08:00
if phone_vocab_path:
with open(phone_vocab_path, 'rt') as f:
phn_id = [line.strip().split() for line in f.readlines()]
for phn, id in phn_id:
2021-08-03 18:10:39 +08:00
self.vocab_phones[phn] = int(id)
2021-08-02 14:28:25 +08:00
if tone_vocab_path:
with open(tone_vocab_path, 'rt') as f:
tone_id = [line.strip().split() for line in f.readlines()]
for tone, id in tone_id:
2021-08-03 18:10:39 +08:00
self.vocab_tones[tone] = int(id)
2021-07-22 18:31:34 +08:00
2021-08-02 14:28:25 +08:00
def _p2id(self, phonemes):
2021-07-22 18:31:34 +08:00
# replace unk phone with sp
phonemes = [
2021-08-03 18:10:39 +08:00
phn if phn in self.vocab_phones else "sp" for phn in phonemes
2021-07-22 18:31:34 +08:00
]
2021-08-03 18:10:39 +08:00
phone_ids = [self.vocab_phones[item] for item in phonemes]
2021-07-22 18:31:34 +08:00
return np.array(phone_ids, np.int64)
2021-08-02 14:28:25 +08:00
def _t2id(self, tones):
# replace unk phone with sp
tones = [
2021-08-03 18:10:39 +08:00
tone if tone in self.vocab_tones else "0" for tone in tones
2021-08-02 14:28:25 +08:00
]
2021-08-03 18:10:39 +08:00
tone_ids = [self.vocab_tones[item] for item in tones]
2021-08-02 14:28:25 +08:00
return np.array(tone_ids, np.int64)
def get_input_ids(self, sentence, get_tone_ids=False):
phonemes = self.frontend.get_phonemes(sentence)
result = {}
phones = []
tones = []
2021-08-03 18:10:39 +08:00
if get_tone_ids and self.vocab_tones:
2021-08-02 14:28:25 +08:00
for full_phone in phonemes:
# split tone from finals
match = re.match(r'^(\w+)([012345])$', full_phone)
if match:
phones.append(match.group(1))
tones.append(match.group(2))
else:
phones.append(full_phone)
tones.append('0')
tone_ids = self._t2id(tones)
tone_ids = paddle.to_tensor(tone_ids)
result["tone_ids"] = tone_ids
else:
phones = phonemes
phone_ids = self._p2id(phones)
2021-07-22 18:31:34 +08:00
phone_ids = paddle.to_tensor(phone_ids)
2021-08-02 14:28:25 +08:00
result["phone_ids"] = phone_ids
return result