Merge pull request #135 from yt605155624/fastspeech2_format

format code and add typehint for tone_sandhi
This commit is contained in:
Hui Zhang 2021-08-04 05:41:40 -05:00 committed by GitHub
commit 3ac2e01263
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 265 additions and 234 deletions

View File

@ -9,7 +9,7 @@ Download BZNSYP from it's [Official Website](https://test.data-baker.com/data/in
### Get MFA result of BZNSYP and Extract it. ### Get MFA result of BZNSYP and Extract it.
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2. We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
You can download from here [baker_alignmenti_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignmenti_tone.tar.gz), or train your own MFA model reference to [use_mfa example](https://github.com/PaddlePaddle/Parakeet/tree/develop/examples/use_mfa) of our repo. You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your own MFA model reference to [use_mfa example](https://github.com/PaddlePaddle/Parakeet/tree/develop/examples/use_mfa) of our repo.
### Preprocess the dataset. ### Preprocess the dataset.
@ -26,9 +26,9 @@ Run the command below to preprocess the dataset.
``` ```
## Synthesize ## Synthesize
We use [parallel wavegan](https://github.com/PaddlePaddle/Parakeet/tree/develop/examples/parallelwave_gan/baker) as the neural vocoder. We use [parallel wavegan](https://github.com/PaddlePaddle/Parakeet/tree/develop/examples/parallelwave_gan/baker) as the neural vocoder.
Download pretrained parallel wavegan model from [parallel_wavegan_baker_ckpt_1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/parallel_wavegan_baker_ckpt_1.0.zip) and unzip it. Download pretrained parallel wavegan model from [parallel_wavegan_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/parallel_wavegan_baker_ckpt_0.4.zip) and unzip it.
```bash ```bash
unzip parallel_wavegan_baker_ckpt_1.0.zip unzip parallel_wavegan_baker_ckpt_0.4.zip
``` ```
`synthesize.sh` can synthesize waveform from `metadata.jsonl`. `synthesize.sh` can synthesize waveform from `metadata.jsonl`.
`synthesize_e2e.sh` can synthesize waveform from text list. `synthesize_e2e.sh` can synthesize waveform from text list.
@ -44,19 +44,19 @@ or
You can see the bash files for more datails of input parameters. You can see the bash files for more datails of input parameters.
## Pretrained Model ## Pretrained Model
Pretrained Model with no sil in the edge of audios can be downloaded here. [fastspeech2_nosil_baker_ckpt_1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_baker_ckpt_1.0.zip) Pretrained Model with no sil in the edge of audios can be downloaded here. [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_baker_ckpt_0.4.zip)
Then, you can use the following scripts to synthesize for `sentences.txt` using pretrained fastspeech2 model. Then, you can use the following scripts to synthesize for `sentences.txt` using pretrained fastspeech2 model.
```bash ```bash
python3 synthesize_e2e.py \ python3 synthesize_e2e.py \
--fastspeech2-config=fastspeech2_nosil_baker_ckpt_1.0/default.yaml \ --fastspeech2-config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
--fastspeech2-checkpoint=fastspeech2_nosil_baker_ckpt_1.0/snapshot_iter_76000.pdz \ --fastspeech2-checkpoint=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
--fastspeech2-stat=fastspeech2_nosil_baker_ckpt_1.0/speech_stats.npy \ --fastspeech2-stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
--pwg-config=parallel_wavegan_baker_ckpt_1.0/pwg_default.yaml \ --pwg-config=parallel_wavegan_baker_ckpt_0.4/pwg_default.yaml \
--pwg-params=parallel_wavegan_baker_ckpt_1.0/pwg_generator.pdparams \ --pwg-params=parallel_wavegan_baker_ckpt_0.4/pwg_generator.pdparams \
--pwg-stat=parallel_wavegan_baker_ckpt_1.0/pwg_stats.npy \ --pwg-stat=parallel_wavegan_baker_ckpt_0.4/pwg_stats.npy \
--text=sentences.txt \ --text=sentences.txt \
--output-dir=exp/debug/test_e2e \ --output-dir=exp/debug/test_e2e \
--device="gpu" \ --device="gpu" \
--phones=fastspeech2_nosil_baker_ckpt_1.0/phone_id_map.txt --phones-dict=fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
``` ```

View File

@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import numpy as np import numpy as np
import paddle
from parakeet.data.batch import batch_sequences from parakeet.data.batch import batch_sequences
@ -35,6 +36,15 @@ def collate_baker_examples(examples):
durations = batch_sequences(durations) durations = batch_sequences(durations)
energy = batch_sequences(energy) energy = batch_sequences(energy)
# convert each batch to paddle.Tensor
text = paddle.to_tensor(text)
pitch = paddle.to_tensor(pitch)
speech = paddle.to_tensor(speech)
durations = paddle.to_tensor(durations)
energy = paddle.to_tensor(energy)
text_lengths = paddle.to_tensor(text_lengths)
speech_lengths = paddle.to_tensor(speech_lengths)
batch = { batch = {
"text": text, "text": text,
"text_lengths": text_lengths, "text_lengths": text_lengths,

View File

@ -44,9 +44,7 @@ class Frontend():
def _t2id(self, tones): def _t2id(self, tones):
# replace unk phone with sp # replace unk phone with sp
tones = [ tones = [tone if tone in self.vocab_tones else "0" for tone in tones]
tone if tone in self.vocab_tones else "0" for tone in tones
]
tone_ids = [self.vocab_tones[item] for item in tones] tone_ids = [self.vocab_tones[item] for item in tones]
return np.array(tone_ids, np.int64) return np.array(tone_ids, np.int64)

View File

@ -88,12 +88,7 @@ class LogMelFBank():
class Pitch(): class Pitch():
def __init__(self, def __init__(self, sr=24000, hop_length=300, f0min=80, f0max=7600):
sr=24000,
hop_length=300,
f0min=80,
f0max=7600
):
self.sr = sr self.sr = sr
self.hop_length = hop_length self.hop_length = hop_length
@ -241,17 +236,16 @@ if __name__ == "__main__":
print(mel) print(mel)
print(mel.shape) print(mel.shape)
pitch_extractor = Pitch(sr=C.fs, pitch_extractor = Pitch(
hop_length=C.n_shift, sr=C.fs, hop_length=C.n_shift, f0min=C.f0min, f0max=C.f0max)
f0min=C.f0min,
f0max=C.f0max)
duration = "2 8 8 8 12 11 10 13 11 10 18 9 12 10 12 11 5" duration = "2 8 8 8 12 11 10 13 11 10 18 9 12 10 12 11 5"
duration = np.array([int(x) for x in duration.split(" ")]) duration = np.array([int(x) for x in duration.split(" ")])
avg_f0 = pitch_extractor.get_pitch(wav, duration=duration) avg_f0 = pitch_extractor.get_pitch(wav, duration=duration)
print(avg_f0) print(avg_f0)
print(avg_f0.shape) print(avg_f0.shape)
energy_extractor = Energy(sr=C.fs, energy_extractor = Energy(
sr=C.fs,
n_fft=C.n_fft, n_fft=C.n_fft,
hop_length=C.n_shift, hop_length=C.n_shift,
win_length=C.win_length, win_length=C.win_length,

View File

@ -139,8 +139,7 @@ def compare_duration_and_mel_length(sentences, utt, mel):
sentences.pop(utt) sentences.pop(utt)
def process_sentence( def process_sentence(config: Dict[str, Any],
config: Dict[str, Any],
fp: Path, fp: Path,
sentences: Dict, sentences: Dict,
output_dir: Path, output_dir: Path,
@ -160,7 +159,8 @@ def process_sentence(
durations = sentences[utt_id][1] durations = sentences[utt_id][1]
d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant') d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant')
# little imprecise than use *.TextGrid directly # little imprecise than use *.TextGrid directly
times = librosa.frames_to_time(d_cumsum, sr=config.fs, hop_length=config.n_shift) times = librosa.frames_to_time(
d_cumsum, sr=config.fs, hop_length=config.n_shift)
if cut_sil: if cut_sil:
start = 0 start = 0
end = d_cumsum[-1] end = d_cumsum[-1]
@ -239,7 +239,8 @@ def process_sentences(config,
for fp in fps: for fp in fps:
future = pool.submit(process_sentence, config, fp, future = pool.submit(process_sentence, config, fp,
sentences, output_dir, mel_extractor, sentences, output_dir, mel_extractor,
pitch_extractor, energy_extractor, cut_sil) pitch_extractor, energy_extractor,
cut_sil)
future.add_done_callback(lambda p: progress.update()) future.add_done_callback(lambda p: progress.update())
futures.append(future) futures.append(future)
@ -289,7 +290,10 @@ def main():
return True if str.lower() == 'true' else False return True if str.lower() == 'true' else False
parser.add_argument( parser.add_argument(
"--cut-sil", type=str2bool, default=True, help="whether cut sil in the edge of audio") "--cut-sil",
type=str2bool,
default=True,
help="whether cut sil in the edge of audio")
args = parser.parse_args() args = parser.parse_args()
C = get_cfg_default() C = get_cfg_default()
@ -336,11 +340,10 @@ def main():
n_mels=C.n_mels, n_mels=C.n_mels,
fmin=C.fmin, fmin=C.fmin,
fmax=C.fmax) fmax=C.fmax)
pitch_extractor = Pitch(sr=C.fs, pitch_extractor = Pitch(
hop_length=C.n_shift, sr=C.fs, hop_length=C.n_shift, f0min=C.f0min, f0max=C.f0max)
f0min=C.f0min, energy_extractor = Energy(
f0max=C.f0max) sr=C.fs,
energy_extractor = Energy(sr=C.fs,
n_fft=C.n_fft, n_fft=C.n_fft,
hop_length=C.n_shift, hop_length=C.n_shift,
win_length=C.win_length, win_length=C.win_length,

View File

@ -3,11 +3,11 @@
python3 synthesize.py \ python3 synthesize.py \
--fastspeech2-config=conf/default.yaml \ --fastspeech2-config=conf/default.yaml \
--fastspeech2-checkpoint=exp/default/checkpoints/snapshot_iter_62577.pdz \ --fastspeech2-checkpoint=exp/default/checkpoints/snapshot_iter_153.pdz \
--fastspeech2-stat=dump/train/speech_stats.npy \ --fastspeech2-stat=dump/train/speech_stats.npy \
--pwg-config=parallel_wavegan_baker_ckpt_1.0/pwg_default.yaml \ --pwg-config=parallel_wavegan_baker_ckpt_0.4/pwg_default.yaml \
--pwg-params=parallel_wavegan_baker_ckpt_1.0/pwg_generator.pdparams \ --pwg-params=parallel_wavegan_baker_ckpt_0.4/pwg_generator.pdparams \
--pwg-stat=parallel_wavegan_baker_ckpt_1.0/pwg_stats.npy \ --pwg-stat=parallel_wavegan_baker_ckpt_0.4/pwg_stats.npy \
--test-metadata=dump/test/norm/metadata.jsonl \ --test-metadata=dump/test/norm/metadata.jsonl \
--output-dir=exp/debug/test \ --output-dir=exp/debug/test \
--device="gpu" \ --device="gpu" \

View File

@ -3,11 +3,11 @@
python3 synthesize_e2e.py \ python3 synthesize_e2e.py \
--fastspeech2-config=conf/default.yaml \ --fastspeech2-config=conf/default.yaml \
--fastspeech2-checkpoint=exp/default/checkpoints/snapshot_iter_136017.pdz \ --fastspeech2-checkpoint=exp/default/checkpoints/snapshot_iter_153.pdz \
--fastspeech2-stat=dump/train/speech_stats.npy \ --fastspeech2-stat=dump/train/speech_stats.npy \
--pwg-config=parallel_wavegan_baker_ckpt_1.0/pwg_default.yaml \ --pwg-config=parallel_wavegan_baker_ckpt_0.4/pwg_default.yaml \
--pwg-params=parallel_wavegan_baker_ckpt_1.0/pwg_generator.pdparams \ --pwg-params=parallel_wavegan_baker_ckpt_0.4/pwg_generator.pdparams \
--pwg-stat=parallel_wavegan_baker_ckpt_1.0/pwg_stats.npy \ --pwg-stat=parallel_wavegan_baker_ckpt_0.4/pwg_stats.npy \
--text=sentences.txt \ --text=sentences.txt \
--output-dir=exp/debug/test_e2e \ --output-dir=exp/debug/test_e2e \
--device="gpu" \ --device="gpu" \

View File

@ -97,7 +97,7 @@ def process_sentence(config: Dict[str, Any],
utt_id = fp.stem utt_id = fp.stem
# reading # reading
y, sr = librosa.load(fp, sr=config.sr) # resampling may occur y, sr = librosa.load(str(fp), sr=config.sr) # resampling may occur
assert len(y.shape) == 1, f"{utt_id} is not a mono-channel audio." assert len(y.shape) == 1, f"{utt_id} is not a mono-channel audio."
assert np.abs(y).max( assert np.abs(y).max(
) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM." ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."

View File

@ -1,6 +1,5 @@
FLAGS_cudnn_exhaustive_search=true \ FLAGS_cudnn_exhaustive_search=true \
FLAGS_conv_workspace_size_limit=4000 \ FLAGS_conv_workspace_size_limit=4000 \
python train.py \ python train.py \
--train-metadata=dump/train/norm/metadata.jsonl \ --train-metadata=dump/train/norm/metadata.jsonl \
--dev-metadata=dump/dev/norm/metadata.jsonl \ --dev-metadata=dump/dev/norm/metadata.jsonl \

View File

@ -19,4 +19,3 @@ from parakeet.frontend.normalizer import *
from parakeet.frontend.cn_normalization import * from parakeet.frontend.cn_normalization import *
from parakeet.frontend.tone_sandhi import * from parakeet.frontend.tone_sandhi import *
from parakeet.frontend.generate_lexicon import * from parakeet.frontend.generate_lexicon import *

View File

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import jieba.posseg as psg import jieba.posseg as psg
import numpy as np import numpy as np
import paddle import paddle
@ -34,7 +33,8 @@ class Frontend():
self.g2p_model = g2p_model self.g2p_model = g2p_model
if self.g2p_model == "g2pM": if self.g2p_model == "g2pM":
self.g2pM_model = G2pM() self.g2pM_model = G2pM()
self.pinyin2phone = generate_lexicon(with_tone=True, with_erhua=False) self.pinyin2phone = generate_lexicon(
with_tone=True, with_erhua=False)
def _get_initials_finals(self, word): def _get_initials_finals(self, word):
initials = [] initials = []
@ -84,7 +84,8 @@ class Frontend():
if pos == 'eng': if pos == 'eng':
continue continue
sub_initials, sub_finals = self._get_initials_finals(word) sub_initials, sub_finals = self._get_initials_finals(word)
sub_finals = self.tone_modifier.modified_tone(word, pos, sub_finals) sub_finals = self.tone_modifier.modified_tone(word, pos,
sub_finals)
initials.append(sub_initials) initials.append(sub_initials)
finals.append(sub_finals) finals.append(sub_finals)
# assert len(sub_initials) == len(sub_finals) == len(word) # assert len(sub_initials) == len(sub_finals) == len(word)

View File

@ -1,4 +1,4 @@
supported NSW (Non-Standard-Word) Normalization ## Supported NSW (Non-Standard-Word) Normalization
|NSW type|raw|normalized| |NSW type|raw|normalized|
|-|-|-| |-|-|-|
@ -9,3 +9,5 @@ supported NSW (Non-Standard-Word) Normalization
|money|随便来几个价格12块534.5元20.1万|随便来几个价格十二块五 三十四点五元 二十点一万| |money|随便来几个价格12块534.5元20.1万|随便来几个价格十二块五 三十四点五元 二十点一万|
|percentage|明天有62的概率降雨|明天有百分之六十二的概率降雨| |percentage|明天有62的概率降雨|明天有百分之六十二的概率降雨|
|telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一| |telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
## References
[Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files)

View File

@ -24,12 +24,13 @@ def _time_num2str(num_string: str) -> str:
result = DIGITS['0'] + result result = DIGITS['0'] + result
return result return result
# 时刻表达式 # 时刻表达式
RE_TIME = re.compile( RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
r'([0-1]?[0-9]|2[0-3])'
r':([0-5][0-9])' r':([0-5][0-9])'
r'(:([0-5][0-9]))?' r'(:([0-5][0-9]))?')
)
def replace_time(match: re.Match) -> str: def replace_time(match: re.Match) -> str:
hour = match.group(1) hour = match.group(1)
minute = match.group(2) minute = match.group(2)
@ -43,11 +44,11 @@ def replace_time(match: re.Match) -> str:
return result return result
RE_DATE = re.compile( RE_DATE = re.compile(r'(\d{4}|\d{2})年'
r'(\d{4}|\d{2})年'
r'((0?[1-9]|1[0-2])月)?' r'((0?[1-9]|1[0-2])月)?'
r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?' r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?')
)
def replace_date(match: re.Match) -> str: def replace_date(match: re.Match) -> str:
year = match.group(1) year = match.group(1)
month = match.group(3) month = match.group(3)
@ -61,10 +62,12 @@ def replace_date(match: re.Match) -> str:
result += f"{verbalize_cardinal(day)}{match.group(9)}" result += f"{verbalize_cardinal(day)}{match.group(9)}"
return result return result
# 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期 # 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期
RE_DATE2 = re.compile( RE_DATE2 = re.compile(
r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])' r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])')
)
def replace_date2(match: re.Match) -> str: def replace_date2(match: re.Match) -> str:
year = match.group(1) year = match.group(1)
month = match.group(3) month = match.group(3)

View File

@ -16,7 +16,6 @@ import re
import string import string
from pypinyin.constants import SUPPORT_UCS4 from pypinyin.constants import SUPPORT_UCS4
# 全角半角转换 # 全角半角转换
# 英文字符全角 -> 半角映射表 (num: 52) # 英文字符全角 -> 半角映射表 (num: 52)
F2H_ASCII_LETTERS = { F2H_ASCII_LETTERS = {
@ -28,10 +27,7 @@ F2H_ASCII_LETTERS = {
H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()} H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
# 数字字符全角 -> 半角映射表 (num: 10) # 数字字符全角 -> 半角映射表 (num: 10)
F2H_DIGITS = { F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits}
chr(ord(char) + 65248): char
for char in string.digits
}
# 数字字符半角 -> 全角映射表 # 数字字符半角 -> 全角映射表
H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()} H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
@ -49,8 +45,7 @@ H2F_SPACE = {' ': '\u3000'}
# 非"有拼音的汉字"的字符串可用于NSW提取 # 非"有拼音的汉字"的字符串可用于NSW提取
if SUPPORT_UCS4: if SUPPORT_UCS4:
RE_NSW = re.compile( RE_NSW = re.compile(r'(?:[^'
r'(?:[^'
r'\u3007' # r'\u3007' #
r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF] r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF] r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
@ -59,8 +54,7 @@ if SUPPORT_UCS4:
r'\U0002A703-\U0002B73F' # CJK扩展C:[2A700-2B73F] r'\U0002A703-\U0002B73F' # CJK扩展C:[2A700-2B73F]
r'\U0002B740-\U0002B81D' # CJK扩展D:[2B740-2B81D] r'\U0002B740-\U0002B81D' # CJK扩展D:[2B740-2B81D]
r'\U0002F80A-\U0002FA1F' # CJK兼容扩展:[2F800-2FA1F] r'\U0002F80A-\U0002FA1F' # CJK兼容扩展:[2F800-2FA1F]
r'])+' r'])+')
)
else: else:
RE_NSW = re.compile( # pragma: no cover RE_NSW = re.compile( # pragma: no cover
r'(?:[^' r'(?:[^'
@ -68,5 +62,4 @@ else:
r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF] r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF] r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
r'\uf900-\ufaff' # CJK兼容:[F900-FAFF] r'\uf900-\ufaff' # CJK兼容:[F900-FAFF]
r'])+' r'])+')
)

View File

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Rules to verbalize numbers into Chinese characters. Rules to verbalize numbers into Chinese characters.
https://zh.wikipedia.org/wiki/中文数字#現代中文 https://zh.wikipedia.org/wiki/中文数字#現代中文
@ -21,7 +20,6 @@ import re
from collections import OrderedDict from collections import OrderedDict
from typing import List from typing import List
DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')} DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')}
UNITS = OrderedDict({ UNITS = OrderedDict({
1: '', 1: '',
@ -33,6 +31,8 @@ UNITS = OrderedDict({
# 分数表达式 # 分数表达式
RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)') RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
def replace_frac(match: re.Match) -> str: def replace_frac(match: re.Match) -> str:
sign = match.group(1) sign = match.group(1)
nominator = match.group(2) nominator = match.group(2)
@ -46,6 +46,8 @@ def replace_frac(match: re.Match) -> str:
# 百分数表达式 # 百分数表达式
RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%') RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%')
def replace_percentage(match: re.Match) -> str: def replace_percentage(match: re.Match) -> str:
sign = match.group(1) sign = match.group(1)
percent = match.group(2) percent = match.group(2)
@ -54,28 +56,28 @@ def replace_percentage(match: re.Match) -> str:
result = f"{sign}百分之{percent}" result = f"{sign}百分之{percent}"
return result return result
# 整数表达式 # 整数表达式
# 带负号或者不带负号的整数 12, -10 # 带负号或者不带负号的整数 12, -10
RE_INTEGER = re.compile( RE_INTEGER = re.compile(r'(-?)' r'(\d+)')
r'(-?)'
r'(\d+)'
)
# 编号-无符号整形 # 编号-无符号整形
# 00078 # 00078
RE_DEFAULT_NUM = re.compile(r'\d{4}\d*') RE_DEFAULT_NUM = re.compile(r'\d{4}\d*')
def replace_default_num(match: re.Match): def replace_default_num(match: re.Match):
number = match.group(0) number = match.group(0)
return verbalize_digit(number) return verbalize_digit(number)
# 数字表达式 # 数字表达式
# 1. 整数: -10, 10; # 1. 整数: -10, 10;
# 2. 浮点数: 10.2, -0.3 # 2. 浮点数: 10.2, -0.3
# 3. 不带符号和整数部分的纯浮点数: .22, .38 # 3. 不带符号和整数部分的纯浮点数: .22, .38
RE_NUMBER = re.compile( RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))')
r'(-?)((\d+)(\.\d+)?)'
r'|(\.(\d+))'
)
def replace_number(match: re.Match) -> str: def replace_number(match: re.Match) -> str:
sign = match.group(1) sign = match.group(1)
number = match.group(2) number = match.group(2)
@ -88,11 +90,12 @@ def replace_number(match: re.Match) -> str:
result = f"{sign}{number}" result = f"{sign}{number}"
return result return result
# 范围表达式 # 范围表达式
# 12-23, 12~23 # 12-23, 12~23
RE_RANGE = re.compile( RE_RANGE = re.compile(r'(\d+)[-~](\d+)')
r'(\d+)[-~](\d+)'
)
def replace_range(match: re.Match) -> str: def replace_range(match: re.Match) -> str:
first, second = match.group(1), match.group(2) first, second = match.group(1), match.group(2)
first: str = num2str(first) first: str = num2str(first)
@ -111,10 +114,13 @@ def _get_value(value_string: str, use_zero: bool=True) -> List[str]:
else: else:
return [DIGITS[stripped]] return [DIGITS[stripped]]
else: else:
largest_unit = next(power for power in reversed(UNITS.keys()) if power < len(stripped)) largest_unit = next(
power for power in reversed(UNITS.keys()) if power < len(stripped))
first_part = value_string[:-largest_unit] first_part = value_string[:-largest_unit]
second_part = value_string[-largest_unit:] second_part = value_string[-largest_unit:]
return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(second_part) return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(
second_part)
def verbalize_cardinal(value_string: str) -> str: def verbalize_cardinal(value_string: str) -> str:
if not value_string: if not value_string:
@ -127,10 +133,12 @@ def verbalize_cardinal(value_string: str) -> str:
result_symbols = _get_value(value_string) result_symbols = _get_value(value_string)
# verbalized number starting with '一十*' is abbreviated as `十*` # verbalized number starting with '一十*' is abbreviated as `十*`
if len(result_symbols) >= 2 and result_symbols[0] == DIGITS['1'] and result_symbols[1] == UNITS[1]: if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[
'1'] and result_symbols[1] == UNITS[1]:
result_symbols = result_symbols[1:] result_symbols = result_symbols[1:]
return ''.join(result_symbols) return ''.join(result_symbols)
def verbalize_digit(value_string: str, alt_one=False) -> str: def verbalize_digit(value_string: str, alt_one=False) -> str:
result_symbols = [DIGITS[digit] for digit in value_string] result_symbols = [DIGITS[digit] for digit in value_string]
result = ''.join(result_symbols) result = ''.join(result_symbols)
@ -138,6 +146,7 @@ def verbalize_digit(value_string: str, alt_one=False) -> str:
result.replace("", "") result.replace("", "")
return result return result
def num2str(value_string: str) -> str: def num2str(value_string: str) -> str:
integer_decimal = value_string.split('.') integer_decimal = value_string.split('.')
if len(integer_decimal) == 1: if len(integer_decimal) == 1:
@ -146,7 +155,9 @@ def num2str(value_string: str) -> str:
elif len(integer_decimal) == 2: elif len(integer_decimal) == 2:
integer, decimal = integer_decimal integer, decimal = integer_decimal
else: else:
raise ValueError(f"The value string: '${value_string}' has more than one point in it.") raise ValueError(
f"The value string: '${value_string}' has more than one point in it."
)
result = verbalize_cardinal(integer) result = verbalize_cardinal(integer)

View File

@ -16,7 +16,6 @@ import re
from .num import verbalize_digit from .num import verbalize_digit
# 规范化固话/手机号码 # 规范化固话/手机号码
# 手机 # 手机
# http://www.jihaoba.com/news/show/13680 # http://www.jihaoba.com/news/show/13680
@ -33,12 +32,14 @@ def phone2str(phone_string: str, mobile=True) -> str:
if mobile: if mobile:
sp_parts = phone_string.strip('+').split() sp_parts = phone_string.strip('+').split()
result = ''.join( result = ''.join(
[verbalize_digit(part, alt_one=True) for part in sp_parts]) [verbalize_digit(
part, alt_one=True) for part in sp_parts])
return result return result
else: else:
sil_parts = phone_string.split('-') sil_parts = phone_string.split('-')
result = ''.join( result = ''.join(
[verbalize_digit(part, alt_one=True) for part in sil_parts]) [verbalize_digit(
part, alt_one=True) for part in sil_parts])
return result return result

View File

@ -16,12 +16,11 @@ import re
from .num import num2str from .num import num2str
# 温度表达式,温度会影响负号的读法 # 温度表达式,温度会影响负号的读法
# -3°C 零下三度 # -3°C 零下三度
RE_TEMPERATURE = re.compile( RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)')
r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)'
)
def replace_temperature(match: re.Match) -> str: def replace_temperature(match: re.Match) -> str:
sign = match.group(1) sign = match.group(1)
temperature = match.group(2) temperature = match.group(2)

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import List, Tuple
import jieba import jieba
from pypinyin import lazy_pinyin from pypinyin import lazy_pinyin
@ -20,43 +21,51 @@ from pypinyin import Style
class ToneSandhi(): class ToneSandhi():
def __init__(self): def __init__(self):
self.must_neural_tone_words = {'麻烦', '麻利', '鸳鸯', '高粱', '骨头', '骆驼', '马虎', '首饰', '馒头', '馄饨', '风筝', '难为', '队伍', self.must_neural_tone_words = {
'阔气', '闺女', '门道', '锄头', '铺盖', '铃铛', '铁匠', '钥匙', '里脊', '里头', '部分', '那么', '道士', '麻烦', '麻利', '鸳鸯', '高粱', '骨头', '骆驼', '马虎', '首饰', '馒头', '馄饨', '风筝',
'造化', '迷糊', '连累', '这么', '这个', '运气', '过去', '软和', '转悠', '踏实', '跳蚤', '跟头', '趔趄', '难为', '队伍', '阔气', '闺女', '门道', '锄头', '铺盖', '铃铛', '铁匠', '钥匙', '里脊',
'财主', '豆腐', '讲究', '记性', '记号', '认识', '规矩', '见识', '裁缝', '补丁', '衣裳', '衣服', '衙门', '里头', '部分', '那么', '道士', '造化', '迷糊', '连累', '这么', '这个', '运气', '过去',
'街坊', '行李', '行当', '蛤蟆', '蘑菇', '薄荷', '葫芦', '葡萄', '萝卜', '荸荠', '苗条', '苗头', '苍蝇', '软和', '转悠', '踏实', '跳蚤', '跟头', '趔趄', '财主', '豆腐', '讲究', '记性', '记号',
'芝麻', '舒服', '舒坦', '舌头', '自在', '膏药', '脾气', '脑袋', '脊梁', '能耐', '胳膊', '胭脂', '胡萝', '认识', '规矩', '见识', '裁缝', '补丁', '衣裳', '衣服', '衙门', '街坊', '行李', '行当',
'胡琴', '胡同', '聪明', '耽误', '耽搁', '耷拉', '耳朵', '老爷', '老实', '老婆', '老头', '老太', '翻腾', '蛤蟆', '蘑菇', '薄荷', '葫芦', '葡萄', '萝卜', '荸荠', '苗条', '苗头', '苍蝇', '芝麻',
'罗嗦', '罐头', '编辑', '结实', '红火', '累赘', '糨糊', '糊涂', '精神', '粮食', '簸箕', '篱笆', '算计', '舒服', '舒坦', '舌头', '自在', '膏药', '脾气', '脑袋', '脊梁', '能耐', '胳膊', '胭脂',
'算盘', '答应', '笤帚', '笑语', '笑话', '窟窿', '窝囊', '窗户', '稳当', '稀罕', '称呼', '秧歌', '秀气', '胡萝', '胡琴', '胡同', '聪明', '耽误', '耽搁', '耷拉', '耳朵', '老爷', '老实', '老婆',
'秀才', '福气', '祖宗', '砚台', '码头', '石榴', '石头', '石匠', '知识', '眼睛', '眯缝', '眨巴', '眉毛', '老头', '老太', '翻腾', '罗嗦', '罐头', '编辑', '结实', '红火', '累赘', '糨糊', '糊涂',
'相声', '盘算', '白净', '痢疾', '痛快', '疟疾', '疙瘩', '疏忽', '畜生', '生意', '甘蔗', '琵琶', '琢磨', '精神', '粮食', '簸箕', '篱笆', '算计', '算盘', '答应', '笤帚', '笑语', '笑话', '窟窿',
'琉璃', '玻璃', '玫瑰', '玄乎', '狐狸', '状元', '特务', '牲口', '牙碜', '牌楼', '爽快', '爱人', '热闹', '窝囊', '窗户', '稳当', '稀罕', '称呼', '秧歌', '秀气', '秀才', '福气', '祖宗', '砚台',
'烧饼', '烟筒', '烂糊', '点心', '炊帚', '灯笼', '火候', '漂亮', '滑溜', '溜达', '温和', '清楚', '消息', '码头', '石榴', '石头', '石匠', '知识', '眼睛', '眯缝', '眨巴', '眉毛', '相声', '盘算',
'浪头', '活泼', '比方', '正经', '欺负', '模糊', '槟榔', '棺材', '棒槌', '棉花', '核桃', '栅栏', '柴火', '白净', '痢疾', '痛快', '疟疾', '疙瘩', '疏忽', '畜生', '生意', '甘蔗', '琵琶', '琢磨',
'架势', '枕头', '枇杷', '机灵', '本事', '木头', '木匠', '朋友', '月饼', '月亮', '暖和', '明白', '时候', '琉璃', '玻璃', '玫瑰', '玄乎', '狐狸', '状元', '特务', '牲口', '牙碜', '牌楼', '爽快',
'新鲜', '故事', '收拾', '收成', '提防', '挖苦', '挑剔', '指甲', '指头', '拾掇', '拳头', '拨弄', '招牌', '爱人', '热闹', '烧饼', '烟筒', '烂糊', '点心', '炊帚', '灯笼', '火候', '漂亮', '滑溜',
'招呼', '抬举', '护士', '折腾', '扫帚', '打量', '打算', '打点', '打扮', '打听', '打发', '扎实', '扁担', '溜达', '温和', '清楚', '消息', '浪头', '活泼', '比方', '正经', '欺负', '模糊', '槟榔',
'戒指', '懒得', '意识', '意思', '情形', '悟性', '怪物', '思量', '怎么', '念头', '念叨', '快活', '忙活', '棺材', '棒槌', '棉花', '核桃', '栅栏', '柴火', '架势', '枕头', '枇杷', '机灵', '本事',
'志气', '心思', '得罪', '张罗', '弟兄', '开通', '应酬', '庄稼', '干事', '帮手', '帐篷', '希罕', '师父', '木头', '木匠', '朋友', '月饼', '月亮', '暖和', '明白', '时候', '新鲜', '故事', '收拾',
'师傅', '巴结', '巴掌', '差事', '工夫', '岁数', '屁股', '尾巴', '少爷', '小气', '小伙', '将就', '对头', '收成', '提防', '挖苦', '挑剔', '指甲', '指头', '拾掇', '拳头', '拨弄', '招牌', '招呼',
'对付', '寡妇', '家伙', '客气', '实在', '官司', '学问', '学生', '字号', '嫁妆', '媳妇', '媒人', '婆家', '抬举', '护士', '折腾', '扫帚', '打量', '打算', '打点', '打扮', '打听', '打发', '扎实',
'娘家', '委屈', '姑娘', '姐夫', '妯娌', '妥当', '妖精', '奴才', '女婿', '头发', '太阳', '大爷', '大方', '扁担', '戒指', '懒得', '意识', '意思', '情形', '悟性', '怪物', '思量', '怎么', '念头',
'大意', '大夫', '多少', '多么', '外甥', '壮实', '地道', '地方', '在乎', '困难', '嘴巴', '嘱咐', '嘟囔', '念叨', '快活', '忙活', '志气', '心思', '得罪', '张罗', '弟兄', '开通', '应酬', '庄稼',
'嘀咕', '喜欢', '喇嘛', '喇叭', '商量', '唾沫', '哑巴', '哈欠', '哆嗦', '咳嗽', '和尚', '告诉', '告示', '干事', '帮手', '帐篷', '希罕', '师父', '师傅', '巴结', '巴掌', '差事', '工夫', '岁数',
'含糊', '吓唬', '后头', '名字', '名堂', '合同', '吆喝', '叫唤', '口袋', '厚道', '厉害', '千斤', '包袱', '屁股', '尾巴', '少爷', '小气', '小伙', '将就', '对头', '对付', '寡妇', '家伙', '客气',
'包涵', '匀称', '勤快', '动静', '动弹', '功夫', '力气', '前头', '刺猬', '刺激', '别扭', '利落', '利索', '实在', '官司', '学问', '学生', '字号', '嫁妆', '媳妇', '媒人', '婆家', '娘家', '委屈',
'利害', '分析', '出息', '凑合', '凉快', '冷战', '冤枉', '冒失', '养活', '关系', '先生', '兄弟', '便宜', '姑娘', '姐夫', '妯娌', '妥当', '妖精', '奴才', '女婿', '头发', '太阳', '大爷', '大方',
'使唤', '佩服', '作坊', '体面', '位置', '似的', '伙计', '休息', '什么', '人家', '亲戚', '亲家', '交情', '大意', '大夫', '多少', '多么', '外甥', '壮实', '地道', '地方', '在乎', '困难', '嘴巴',
'云彩', '事情', '买卖', '主意', '丫头', '丧气', '两口', '东西', '东家', '世故', '不由', '不在', '下水', '嘱咐', '嘟囔', '嘀咕', '喜欢', '喇嘛', '喇叭', '商量', '唾沫', '哑巴', '哈欠', '哆嗦',
'下巴', '上头', '上司', '丈夫', '丈人', '一辈', '那个'} '咳嗽', '和尚', '告诉', '告示', '含糊', '吓唬', '后头', '名字', '名堂', '合同', '吆喝',
'叫唤', '口袋', '厚道', '厉害', '千斤', '包袱', '包涵', '匀称', '勤快', '动静', '动弹',
'功夫', '力气', '前头', '刺猬', '刺激', '别扭', '利落', '利索', '利害', '分析', '出息',
'凑合', '凉快', '冷战', '冤枉', '冒失', '养活', '关系', '先生', '兄弟', '便宜', '使唤',
'佩服', '作坊', '体面', '位置', '似的', '伙计', '休息', '什么', '人家', '亲戚', '亲家',
'交情', '云彩', '事情', '买卖', '主意', '丫头', '丧气', '两口', '东西', '东家', '世故',
'不由', '不在', '下水', '下巴', '上头', '上司', '丈夫', '丈人', '一辈', '那个'
}
# the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041 # the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041
# e.g. # e.g.
# word: "家里" # word: "家里"
# pos: "s" # pos: "s"
# finals: ['ia1', 'i3'] # finals: ['ia1', 'i3']
def _neural_sandhi(self, word, pos, finals): def _neural_sandhi(self, word: str, pos: str,
finals: List[str]) -> List[str]:
ge_idx = word.find("") ge_idx = word.find("")
if len(word) == 1 and word in "吧呢啊嘛" and pos == 'y': if len(word) == 1 and word in "吧呢啊嘛" and pos == 'y':
finals[-1] = finals[-1][:-1] + "5" finals[-1] = finals[-1][:-1] + "5"
@ -80,12 +89,13 @@ class ToneSandhi():
elif len(word) >= 2 and word[-1] == word[-2] and pos[0] in {"n", "v"}: elif len(word) >= 2 and word[-1] == word[-2] and pos[0] in {"n", "v"}:
finals[-1] = finals[-1][:-1] + "5" finals[-1] = finals[-1][:-1] + "5"
# conventional tone5 in Chinese # conventional tone5 in Chinese
elif word in self.must_neural_tone_words or word[-2:] in self.must_neural_tone_words: elif word in self.must_neural_tone_words or word[
-2:] in self.must_neural_tone_words:
finals[-1] = finals[-1][:-1] + "5" finals[-1] = finals[-1][:-1] + "5"
return finals return finals
def _bu_sandhi(self, word, finals): def _bu_sandhi(self, word: str, finals: List[str]) -> List[str]:
# "不" before tone4 should be bu2, e.g. 不怕 # "不" before tone4 should be bu2, e.g. 不怕
if len(word) > 1 and word[0] == "" and finals[1][-1] == "4": if len(word) > 1 and word[0] == "" and finals[1][-1] == "4":
finals[0] = finals[0][:-1] + "2" finals[0] = finals[0][:-1] + "2"
@ -95,9 +105,10 @@ class ToneSandhi():
return finals return finals
def _yi_sandhi(self, word, finals): def _yi_sandhi(self, word: str, finals: List[str]) -> List[str]:
# "一" in number sequences, e.g. 一零零 # "一" in number sequences, e.g. 一零零
if len(word) > 1 and word[0] == "" and all([item.isnumeric() for item in word]): if len(word) > 1 and word[0] == "" and all(
[item.isnumeric() for item in word]):
return finals return finals
# "一" before tone4 should be yi2, e.g. 一段 # "一" before tone4 should be yi2, e.g. 一段
elif len(word) > 1 and word[0] == "" and finals[1][-1] == "4": elif len(word) > 1 and word[0] == "" and finals[1][-1] == "4":
@ -113,7 +124,7 @@ class ToneSandhi():
finals[1] = finals[1][:-1] + "1" finals[1] = finals[1][:-1] + "1"
return finals return finals
def _three_sandhi(self, word, finals): def _three_sandhi(self, word: str, finals: List[str]) -> List[str]:
if len(word) == 2 and self._all_tone_three(finals): if len(word) == 2 and self._all_tone_three(finals):
finals[0] = finals[0][:-1] + "2" finals[0] = finals[0][:-1] + "2"
elif len(word) == 3: elif len(word) == 3:
@ -138,7 +149,10 @@ class ToneSandhi():
elif len(new_word_list[0]) == 1: elif len(new_word_list[0]) == 1:
finals[1] = finals[1][:-1] + "2" finals[1] = finals[1][:-1] + "2"
else: else:
finals_list = [finals[:len(new_word_list[0])], finals[len(new_word_list[0]):]] finals_list = [
finals[:len(new_word_list[0])],
finals[len(new_word_list[0]):]
]
if len(finals_list) == 2: if len(finals_list) == 2:
for i, sub in enumerate(finals_list): for i, sub in enumerate(finals_list):
# e.g. 所有/人 # e.g. 所有/人
@ -161,12 +175,12 @@ class ToneSandhi():
return finals return finals
def _all_tone_three(self, finals): def _all_tone_three(self, finals: List[str]) -> bool:
return all(x[-1] == "3" for x in finals) return all(x[-1] == "3" for x in finals)
# merge "不" and the word behind it # merge "不" and the word behind it
# if don't merge, "不" sometimes appears alone according to jieba, which may occur sandhi error # if don't merge, "不" sometimes appears alone according to jieba, which may occur sandhi error
def _merge_bu(self, seg): def _merge_bu(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = [] new_seg = []
last_word = "" last_word = ""
for word, pos in seg: for word, pos in seg:
@ -187,15 +201,16 @@ class ToneSandhi():
# e.g. # e.g.
# input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')] # input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')]
# output seg: [['听一听', 'v']] # output seg: [['听一听', 'v']]
def _merge_yi(self, seg): def _merge_yi(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = [] new_seg = []
# function 1 # function 1
for i, (word, pos) in enumerate(seg): for i, (word, pos) in enumerate(seg):
if i - 1 >= 0 and word == "" and i + 1 < len(seg) and seg[i - 1][0] == seg[i + 1][0] and seg[i - 1][ if i - 1 >= 0 and word == "" and i + 1 < len(seg) and seg[i - 1][
1] == "v": 0] == seg[i + 1][0] and seg[i - 1][1] == "v":
new_seg[i - 1][0] = new_seg[i - 1][0] + "" + new_seg[i - 1][0] new_seg[i - 1][0] = new_seg[i - 1][0] + "" + new_seg[i - 1][0]
else: else:
if i - 2 >= 0 and seg[i - 1][0] == "" and seg[i - 2][0] == word and pos == "v": if i - 2 >= 0 and seg[i - 1][0] == "" and seg[i - 2][
0] == word and pos == "v":
continue continue
else: else:
new_seg.append([word, pos]) new_seg.append([word, pos])
@ -210,15 +225,20 @@ class ToneSandhi():
seg = new_seg seg = new_seg
return seg return seg
def _merge_continuous_three_tones(self, seg): def _merge_continuous_three_tones(
self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = [] new_seg = []
sub_finals_list = [lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3) for (word, pos) sub_finals_list = [
in seg] lazy_pinyin(
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
for (word, pos) in seg
]
assert len(sub_finals_list) == len(seg) assert len(sub_finals_list) == len(seg)
merge_last = [False] * len(seg) merge_last = [False] * len(seg)
for i, (word, pos) in enumerate(seg): for i, (word, pos) in enumerate(seg):
if i - 1 >= 0 and self._all_tone_three(sub_finals_list[i - 1]) and self._all_tone_three( if i - 1 >= 0 and self._all_tone_three(sub_finals_list[
sub_finals_list[i]) and not merge_last[i - 1]: i - 1]) and self._all_tone_three(sub_finals_list[
i]) and not merge_last[i - 1]:
if len(seg[i - 1][0]) + len(seg[i][0]) <= 3: if len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
new_seg[-1][0] = new_seg[-1][0] + seg[i][0] new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
merge_last[i] = True merge_last[i] = True
@ -229,13 +249,15 @@ class ToneSandhi():
seg = new_seg seg = new_seg
return seg return seg
def pre_merge_for_modify(self, seg): def pre_merge_for_modify(
self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
seg = self._merge_bu(seg) seg = self._merge_bu(seg)
seg = self._merge_yi(seg) seg = self._merge_yi(seg)
seg = self._merge_continuous_three_tones(seg) seg = self._merge_continuous_three_tones(seg)
return seg return seg
def modified_tone(self, word, pos, finals): def modified_tone(self, word: str, pos: str,
finals: List[str]) -> List[str]:
finals = self._bu_sandhi(word, finals) finals = self._bu_sandhi(word, finals)
finals = self._yi_sandhi(word, finals) finals = self._yi_sandhi(word, finals)
finals = self._neural_sandhi(word, pos, finals) finals = self._neural_sandhi(word, pos, finals)

View File

@ -247,21 +247,20 @@ class FastSpeech2(nn.Layer):
speech_lengths: paddle.Tensor, speech_lengths: paddle.Tensor,
durations: paddle.Tensor, durations: paddle.Tensor,
pitch: paddle.Tensor, pitch: paddle.Tensor,
energy: paddle.Tensor, ) -> Tuple[paddle.Tensor, Dict[ energy: paddle.Tensor, ) -> Sequence[paddle.Tensor]:
str, paddle.Tensor], paddle.Tensor]:
"""Calculate forward propagation. """Calculate forward propagation.
Parameters Parameters
---------- ----------
text : LongTensor text : Tensor
Batch of padded token ids (B, Tmax). Batch of padded token ids (B, Tmax).
text_lengths : LongTensor) text_lengths : Tensor)
Batch of lengths of each input (B,). Batch of lengths of each input (B,).
speech : Tensor speech : Tensor
Batch of padded target features (B, Lmax, odim). Batch of padded target features (B, Lmax, odim).
speech_lengths : LongTensor speech_lengths : Tensor
Batch of the lengths of each target (B,). Batch of the lengths of each target (B,).
durations : LongTensor durations : Tensor
Batch of padded durations (B, Tmax). Batch of padded durations (B, Tmax).
pitch : Tensor pitch : Tensor
Batch of padded token-averaged pitch (B, Tmax, 1). Batch of padded token-averaged pitch (B, Tmax, 1).
@ -281,8 +280,6 @@ class FastSpeech2(nn.Layer):
energy predictor's output energy predictor's output
Tensor Tensor
speech speech
Tensor
real text_lengths
Tensor Tensor
speech_lengths, modified if reduction_factor >1 speech_lengths, modified if reduction_factor >1
""" """
@ -387,17 +384,16 @@ class FastSpeech2(nn.Layer):
pitch: paddle.Tensor=None, pitch: paddle.Tensor=None,
energy: paddle.Tensor=None, energy: paddle.Tensor=None,
alpha: float=1.0, alpha: float=1.0,
use_teacher_forcing: bool=False, ) -> Tuple[ use_teacher_forcing: bool=False, ) -> paddle.Tensor:
paddle.Tensor, paddle.Tensor, paddle.Tensor]:
"""Generate the sequence of features given the sequences of characters. """Generate the sequence of features given the sequences of characters.
Parameters Parameters
---------- ----------
text : LongTensor text : Tensor
Input sequence of characters (T,). Input sequence of characters (T,).
speech : Tensor, optional speech : Tensor, optional
Feature sequence to extract style (N, idim). Feature sequence to extract style (N, idim).
durations : LongTensor, optional durations : Tensor, optional
Groundtruth of duration (T,). Groundtruth of duration (T,).
pitch : Tensor, optional pitch : Tensor, optional
Groundtruth of token-averaged pitch (T, 1). Groundtruth of token-averaged pitch (T, 1).
@ -452,7 +448,7 @@ class FastSpeech2(nn.Layer):
Parameters Parameters
---------- ----------
ilens : LongTensor ilens : Tensor
Batch of lengths (B,). Batch of lengths (B,).
Returns Returns
@ -553,7 +549,7 @@ class FastSpeech2Loss(nn.Layer):
Batch of outputs after postnets (B, Lmax, odim). Batch of outputs after postnets (B, Lmax, odim).
before_outs : Tensor before_outs : Tensor
Batch of outputs before postnets (B, Lmax, odim). Batch of outputs before postnets (B, Lmax, odim).
d_outs : LongTensor d_outs : Tensor
Batch of outputs of duration predictor (B, Tmax). Batch of outputs of duration predictor (B, Tmax).
p_outs : Tensor p_outs : Tensor
Batch of outputs of pitch predictor (B, Tmax, 1). Batch of outputs of pitch predictor (B, Tmax, 1).
@ -561,15 +557,15 @@ class FastSpeech2Loss(nn.Layer):
Batch of outputs of energy predictor (B, Tmax, 1). Batch of outputs of energy predictor (B, Tmax, 1).
ys : Tensor ys : Tensor
Batch of target features (B, Lmax, odim). Batch of target features (B, Lmax, odim).
ds : LongTensor ds : Tensor
Batch of durations (B, Tmax). Batch of durations (B, Tmax).
ps : Tensor ps : Tensor
Batch of target token-averaged pitch (B, Tmax, 1). Batch of target token-averaged pitch (B, Tmax, 1).
es : Tensor es : Tensor
Batch of target token-averaged energy (B, Tmax, 1). Batch of target token-averaged energy (B, Tmax, 1).
ilens : LongTensor ilens : Tensor
Batch of the lengths of each input (B,). Batch of the lengths of each input (B,).
olens : LongTensor olens : Tensor
Batch of the lengths of each target (B,). Batch of the lengths of each target (B,).
Returns Returns

View File

@ -167,15 +167,15 @@ class Timeline(object):
if (k, mevent.device_id, "GPU") not in self._mem_devices: if (k, mevent.device_id, "GPU") not in self._mem_devices:
pid = self._allocate_pid() pid = self._allocate_pid()
self._mem_devices[(k, mevent.device_id, "GPU")] = pid self._mem_devices[(k, mevent.device_id, "GPU")] = pid
self._chrome_trace.emit_pid( self._chrome_trace.emit_pid("memory usage on %s:gpu:%d"
"memory usage on %s:gpu:%d" % (k, mevent.device_id), % (k, mevent.device_id),
pid) pid)
elif mevent.place == profiler_pb2.MemEvent.CPUPlace: elif mevent.place == profiler_pb2.MemEvent.CPUPlace:
if (k, mevent.device_id, "CPU") not in self._mem_devices: if (k, mevent.device_id, "CPU") not in self._mem_devices:
pid = self._allocate_pid() pid = self._allocate_pid()
self._mem_devices[(k, mevent.device_id, "CPU")] = pid self._mem_devices[(k, mevent.device_id, "CPU")] = pid
self._chrome_trace.emit_pid( self._chrome_trace.emit_pid("memory usage on %s:cpu:%d"
"memory usage on %s:cpu:%d" % (k, mevent.device_id), % (k, mevent.device_id),
pid) pid)
elif mevent.place == profiler_pb2.MemEvent.CUDAPinnedPlace: elif mevent.place == profiler_pb2.MemEvent.CUDAPinnedPlace:
if (k, mevent.device_id, "CUDAPinnedPlace" if (k, mevent.device_id, "CUDAPinnedPlace"
@ -190,8 +190,8 @@ class Timeline(object):
if (k, mevent.device_id, "NPU") not in self._mem_devices: if (k, mevent.device_id, "NPU") not in self._mem_devices:
pid = self._allocate_pid() pid = self._allocate_pid()
self._mem_devices[(k, mevent.device_id, "NPU")] = pid self._mem_devices[(k, mevent.device_id, "NPU")] = pid
self._chrome_trace.emit_pid( self._chrome_trace.emit_pid("memory usage on %s:npu:%d"
"memory usage on %s:npu:%d" % (k, mevent.device_id), % (k, mevent.device_id),
pid) pid)
if (k, 0, "CPU") not in self._mem_devices: if (k, 0, "CPU") not in self._mem_devices:
pid = self._allocate_pid() pid = self._allocate_pid()
@ -273,14 +273,14 @@ class Timeline(object):
total_size = 0 total_size = 0
while i < len(mem_list): while i < len(mem_list):
total_size += mem_list[i]['size'] total_size += mem_list[i]['size']
while i < len(mem_list) - 1 and mem_list[i]['time'] == mem_list[ while i < len(mem_list) - 1 and mem_list[i][
i + 1]['time']: 'time'] == mem_list[i + 1]['time']:
total_size += mem_list[i + 1]['size'] total_size += mem_list[i + 1]['size']
i += 1 i += 1
self._chrome_trace.emit_counter( self._chrome_trace.emit_counter(
"Memory", "Memory", mem_list[i]['pid'], mem_list[i]['time'], "Memory", "Memory", mem_list[i]['pid'],
0, total_size) mem_list[i]['time'], 0, total_size)
i += 1 i += 1
def generate_chrome_trace(self): def generate_chrome_trace(self):