Merge branch 'develop' of github.com:PaddlePaddle/Parakeet into develop
This commit is contained in:
commit
e3c024dd52
|
@ -12,6 +12,8 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import re
|
import re
|
||||||
|
from typing import Dict
|
||||||
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import paddle
|
import paddle
|
||||||
|
@ -35,7 +37,7 @@ class Frontend():
|
||||||
for tone, id in tone_id:
|
for tone, id in tone_id:
|
||||||
self.vocab_tones[tone] = int(id)
|
self.vocab_tones[tone] = int(id)
|
||||||
|
|
||||||
def _p2id(self, phonemes):
|
def _p2id(self, phonemes: List[str]) -> np.array:
|
||||||
# replace unk phone with sp
|
# replace unk phone with sp
|
||||||
phonemes = [
|
phonemes = [
|
||||||
phn if phn in self.vocab_phones else "sp" for phn in phonemes
|
phn if phn in self.vocab_phones else "sp" for phn in phonemes
|
||||||
|
@ -43,13 +45,14 @@ class Frontend():
|
||||||
phone_ids = [self.vocab_phones[item] for item in phonemes]
|
phone_ids = [self.vocab_phones[item] for item in phonemes]
|
||||||
return np.array(phone_ids, np.int64)
|
return np.array(phone_ids, np.int64)
|
||||||
|
|
||||||
def _t2id(self, tones):
|
def _t2id(self, tones: List[str]) -> np.array:
|
||||||
# replace unk phone with sp
|
# replace unk phone with sp
|
||||||
tones = [tone if tone in self.vocab_tones else "0" for tone in tones]
|
tones = [tone if tone in self.vocab_tones else "0" for tone in tones]
|
||||||
tone_ids = [self.vocab_tones[item] for item in tones]
|
tone_ids = [self.vocab_tones[item] for item in tones]
|
||||||
return np.array(tone_ids, np.int64)
|
return np.array(tone_ids, np.int64)
|
||||||
|
|
||||||
def _get_phone_tone(self, phonemes, get_tone_ids=False):
|
def _get_phone_tone(self, phonemes: List[str],
|
||||||
|
get_tone_ids: bool=False) -> List[List[str]]:
|
||||||
phones = []
|
phones = []
|
||||||
tones = []
|
tones = []
|
||||||
if get_tone_ids and self.vocab_tones:
|
if get_tone_ids and self.vocab_tones:
|
||||||
|
@ -88,7 +91,11 @@ class Frontend():
|
||||||
phones.append(phone)
|
phones.append(phone)
|
||||||
return phones, tones
|
return phones, tones
|
||||||
|
|
||||||
def get_input_ids(self, sentence, merge_sentences=True, get_tone_ids=False):
|
def get_input_ids(
|
||||||
|
self,
|
||||||
|
sentence: str,
|
||||||
|
merge_sentences: bool=True,
|
||||||
|
get_tone_ids: bool=False) -> Dict[str, List[paddle.Tensor]]:
|
||||||
phonemes = self.frontend.get_phonemes(
|
phonemes = self.frontend.get_phonemes(
|
||||||
sentence, merge_sentences=merge_sentences)
|
sentence, merge_sentences=merge_sentences)
|
||||||
result = {}
|
result = {}
|
||||||
|
|
|
@ -15,6 +15,6 @@ Run the command below to get the results of test.
|
||||||
```bash
|
```bash
|
||||||
./run.sh
|
./run.sh
|
||||||
```
|
```
|
||||||
The `avg WER` of g2p is: 0.027124048652822204
|
The `avg WER` of g2p is: 0.027495061517943988
|
||||||
|
|
||||||
The `avg CER` of text normalization is: 0.0061629764893859846
|
The `avg CER` of text normalization is: 0.006391234877881762
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import re
|
import re
|
||||||
|
from typing import List
|
||||||
|
|
||||||
import jieba.posseg as psg
|
import jieba.posseg as psg
|
||||||
from g2pM import G2pM
|
from g2pM import G2pM
|
||||||
|
@ -43,7 +44,7 @@ class Frontend():
|
||||||
"狗儿"
|
"狗儿"
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_initials_finals(self, word):
|
def _get_initials_finals(self, word: str) -> List[List[str]]:
|
||||||
initials = []
|
initials = []
|
||||||
finals = []
|
finals = []
|
||||||
if self.g2p_model == "pypinyin":
|
if self.g2p_model == "pypinyin":
|
||||||
|
@ -78,7 +79,10 @@ class Frontend():
|
||||||
return initials, finals
|
return initials, finals
|
||||||
|
|
||||||
# if merge_sentences, merge all sentences into one phone sequence
|
# if merge_sentences, merge all sentences into one phone sequence
|
||||||
def _g2p(self, sentences, merge_sentences=True, with_erhua=True):
|
def _g2p(self,
|
||||||
|
sentences: List[str],
|
||||||
|
merge_sentences: bool=True,
|
||||||
|
with_erhua: bool=True) -> List[List[str]]:
|
||||||
segments = sentences
|
segments = sentences
|
||||||
phones_list = []
|
phones_list = []
|
||||||
for seg in segments:
|
for seg in segments:
|
||||||
|
@ -120,7 +124,11 @@ class Frontend():
|
||||||
phones_list.append(merge_list)
|
phones_list.append(merge_list)
|
||||||
return phones_list
|
return phones_list
|
||||||
|
|
||||||
def _merge_erhua(self, initials, finals, word, pos):
|
def _merge_erhua(self,
|
||||||
|
initials: List[str],
|
||||||
|
finals: List[str],
|
||||||
|
word: str,
|
||||||
|
pos: str) -> List[List[str]]:
|
||||||
if word not in self.must_erhua and (word in self.not_erhua or
|
if word not in self.must_erhua and (word in self.not_erhua or
|
||||||
pos in {"a", "j", "nr"}):
|
pos in {"a", "j", "nr"}):
|
||||||
return initials, finals
|
return initials, finals
|
||||||
|
@ -137,7 +145,10 @@ class Frontend():
|
||||||
new_initials.append(initials[i])
|
new_initials.append(initials[i])
|
||||||
return new_initials, new_finals
|
return new_initials, new_finals
|
||||||
|
|
||||||
def get_phonemes(self, sentence, merge_sentences=True, with_erhua=True):
|
def get_phonemes(self,
|
||||||
|
sentence: str,
|
||||||
|
merge_sentences: bool=True,
|
||||||
|
with_erhua: bool=True) -> List[List[str]]:
|
||||||
sentences = self.text_normalizer.normalize(sentence)
|
sentences = self.text_normalizer.normalize(sentence)
|
||||||
phonemes = self._g2p(
|
phonemes = self._g2p(
|
||||||
sentences, merge_sentences=merge_sentences, with_erhua=with_erhua)
|
sentences, merge_sentences=merge_sentences, with_erhua=with_erhua)
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -28,7 +28,7 @@ UNITS = OrderedDict({
|
||||||
8: '亿',
|
8: '亿',
|
||||||
})
|
})
|
||||||
|
|
||||||
COM_QUANTIFIERS = '(匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)'
|
COM_QUANTIFIERS = '(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)'
|
||||||
|
|
||||||
# 分数表达式
|
# 分数表达式
|
||||||
RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
|
RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
|
||||||
|
@ -74,7 +74,7 @@ def replace_negative_num(match: re.Match) -> str:
|
||||||
|
|
||||||
# 编号-无符号整形
|
# 编号-无符号整形
|
||||||
# 00078
|
# 00078
|
||||||
RE_DEFAULT_NUM = re.compile(r'\d{4}\d*')
|
RE_DEFAULT_NUM = re.compile(r'\d{3}\d*')
|
||||||
|
|
||||||
|
|
||||||
def replace_default_num(match: re.Match):
|
def replace_default_num(match: re.Match):
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
import re
|
import re
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
from .char_convert import tranditional_to_simplified
|
||||||
from .chronology import RE_DATE
|
from .chronology import RE_DATE
|
||||||
from .chronology import RE_DATE2
|
from .chronology import RE_DATE2
|
||||||
from .chronology import RE_TIME
|
from .chronology import RE_TIME
|
||||||
|
@ -66,8 +67,9 @@ class TextNormalizer():
|
||||||
sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
|
sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
|
||||||
return sentences
|
return sentences
|
||||||
|
|
||||||
def normalize_sentence(self, sentence):
|
def normalize_sentence(self, sentence: str) -> str:
|
||||||
# basic character conversions
|
# basic character conversions
|
||||||
|
sentence = tranditional_to_simplified(sentence)
|
||||||
sentence = sentence.translate(F2H_ASCII_LETTERS).translate(
|
sentence = sentence.translate(F2H_ASCII_LETTERS).translate(
|
||||||
F2H_DIGITS).translate(F2H_SPACE)
|
F2H_DIGITS).translate(F2H_SPACE)
|
||||||
|
|
||||||
|
@ -90,7 +92,7 @@ class TextNormalizer():
|
||||||
|
|
||||||
return sentence
|
return sentence
|
||||||
|
|
||||||
def normalize(self, text):
|
def normalize(self, text: str) -> List[str]:
|
||||||
sentences = self._split(text)
|
sentences = self._split(text)
|
||||||
sentences = [self.normalize_sentence(sent) for sent in sentences]
|
sentences = [self.normalize_sentence(sent) for sent in sentences]
|
||||||
return sentences
|
return sentences
|
||||||
|
|
|
@ -114,7 +114,6 @@ class ToneSandhi():
|
||||||
-2:] in self.must_neural_tone_words:
|
-2:] in self.must_neural_tone_words:
|
||||||
finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
|
finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
|
||||||
finals = sum(finals_list, [])
|
finals = sum(finals_list, [])
|
||||||
|
|
||||||
return finals
|
return finals
|
||||||
|
|
||||||
def _bu_sandhi(self, word: str, finals: List[str]) -> List[str]:
|
def _bu_sandhi(self, word: str, finals: List[str]) -> List[str]:
|
||||||
|
@ -151,11 +150,9 @@ class ToneSandhi():
|
||||||
finals[i] = finals[i][:-1] + "4"
|
finals[i] = finals[i][:-1] + "4"
|
||||||
return finals
|
return finals
|
||||||
|
|
||||||
def _split_word(self, word):
|
def _split_word(self, word: str) -> List[str]:
|
||||||
word_list = jieba.cut_for_search(word)
|
word_list = jieba.cut_for_search(word)
|
||||||
word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
|
word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
|
||||||
new_word_list = []
|
|
||||||
|
|
||||||
first_subword = word_list[0]
|
first_subword = word_list[0]
|
||||||
first_begin_idx = word.find(first_subword)
|
first_begin_idx = word.find(first_subword)
|
||||||
if first_begin_idx == 0:
|
if first_begin_idx == 0:
|
||||||
|
@ -280,7 +277,7 @@ class ToneSandhi():
|
||||||
|
|
||||||
return new_seg
|
return new_seg
|
||||||
|
|
||||||
def _is_reduplication(self, word):
|
def _is_reduplication(self, word: str) -> bool:
|
||||||
return len(word) == 2 and word[0] == word[1]
|
return len(word) == 2 and word[0] == word[1]
|
||||||
|
|
||||||
# the last char of first word and the first char of second word is tone_three
|
# the last char of first word and the first char of second word is tone_three
|
||||||
|
|
Loading…
Reference in New Issue