Merge branch 'develop' of github.com:yt605155624/Parakeet into add_aishell3

This commit is contained in:
TianYuan 2021-08-26 05:44:06 +00:00
commit 372208dd5b
20 changed files with 185 additions and 18 deletions

View File

@ -42,6 +42,10 @@ ignore =
# these ignores are from flake8-comprehensions; please fix! # these ignores are from flake8-comprehensions; please fix!
C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415 C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
per-file-ignores =
*/__init__.py: F401
# Specify the list of error codes you wish Flake8 to report. # Specify the list of error codes you wish Flake8 to report.
select = select =
E, E,

View File

@ -53,6 +53,12 @@ cd Parakeet
pip install -e . pip install -e .
``` ```
If some python dependent packages cannot be installed successfully, you can run the following script first.
(replace `python3.6` with your own python version)
```bash
sudo apt install -y python3.6-dev
```
See [install](https://paddle-parakeet.readthedocs.io/en/latest/install.html) for more details. See [install](https://paddle-parakeet.readthedocs.io/en/latest/install.html) for more details.
## Examples ## Examples

View File

@ -19,9 +19,9 @@ from pathlib import Path
import tqdm import tqdm
import numpy as np import numpy as np
from parakeet.audio import AudioProcessor
from parakeet.audio import LogMagnitude
from parakeet.datasets import LJSpeechMetaData from parakeet.datasets import LJSpeechMetaData
from parakeet.audio import AudioProcessor, LogMagnitude
from parakeet.frontend import English from parakeet.frontend import English
from config import get_cfg_defaults from config import get_cfg_defaults

View File

@ -13,3 +13,11 @@
# limitations under the License. # limitations under the License.
__version__ = "0.0.0" __version__ = "0.0.0"
import logging
from . import data
from . import datasets
from . import frontend
from . import models
from . import modules
from . import training
from . import utils

View File

@ -11,3 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .audio import AudioProcessor
from .spec_normalizer import LogMagnitude
from .spec_normalizer import NormalizerBase

View File

@ -13,3 +13,6 @@
# limitations under the License. # limitations under the License.
"""Parakeet's infrastructure for data processing. """Parakeet's infrastructure for data processing.
""" """
from .dataset import *
from .batch import *

View File

@ -11,3 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .common import *
from .ljspeech import *

View File

@ -11,3 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .cn_normalization import *
from .generate_lexicon import *
from .normalizer import *
from .phonectic import *
from .punctuation import *
from .tone_sandhi import *
from .vocab import *

View File

@ -11,3 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from parakeet.frontend.cn_normalization.text_normlization import *

View File

@ -33,7 +33,15 @@ RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
r'(:([0-5][0-9]))?') r'(:([0-5][0-9]))?')
def replace_time(match: re.Match) -> str: def replace_time(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
hour = match.group(1) hour = match.group(1)
minute = match.group(2) minute = match.group(2)
second = match.group(4) second = match.group(4)
@ -51,7 +59,15 @@ RE_DATE = re.compile(r'(\d{4}|\d{2})年'
r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?') r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?')
def replace_date(match: re.Match) -> str: def replace_date(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
year = match.group(1) year = match.group(1)
month = match.group(3) month = match.group(3)
day = match.group(5) day = match.group(5)
@ -70,7 +86,15 @@ RE_DATE2 = re.compile(
r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])') r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])')
def replace_date2(match: re.Match) -> str: def replace_date2(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
year = match.group(1) year = match.group(1)
month = match.group(3) month = match.group(3)
day = match.group(4) day = match.group(4)

View File

@ -34,7 +34,15 @@ COM_QUANTIFIERS = '(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|
RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)') RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
def replace_frac(match: re.Match) -> str: def replace_frac(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
sign = match.group(1) sign = match.group(1)
nominator = match.group(2) nominator = match.group(2)
denominator = match.group(3) denominator = match.group(3)
@ -49,7 +57,15 @@ def replace_frac(match: re.Match) -> str:
RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%') RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%')
def replace_percentage(match: re.Match) -> str: def replace_percentage(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
sign = match.group(1) sign = match.group(1)
percent = match.group(2) percent = match.group(2)
sign: str = "" if sign else "" sign: str = "" if sign else ""
@ -63,7 +79,15 @@ def replace_percentage(match: re.Match) -> str:
RE_INTEGER = re.compile(r'(-)' r'(\d+)') RE_INTEGER = re.compile(r'(-)' r'(\d+)')
def replace_negative_num(match: re.Match) -> str: def replace_negative_num(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
sign = match.group(1) sign = match.group(1)
number = match.group(2) number = match.group(2)
sign: str = "" if sign else "" sign: str = "" if sign else ""
@ -77,7 +101,15 @@ def replace_negative_num(match: re.Match) -> str:
RE_DEFAULT_NUM = re.compile(r'\d{3}\d*') RE_DEFAULT_NUM = re.compile(r'\d{3}\d*')
def replace_default_num(match: re.Match): def replace_default_num(match):
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
number = match.group(0) number = match.group(0)
return verbalize_digit(number) return verbalize_digit(number)
@ -90,7 +122,15 @@ RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几])?" + COM_QUANTIFIERS)
RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))') RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))')
def replace_positive_quantifier(match: re.Match) -> str: def replace_positive_quantifier(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
number = match.group(1) number = match.group(1)
match_2 = match.group(2) match_2 = match.group(2)
match_2: str = match_2 if match_2 else "" match_2: str = match_2 if match_2 else ""
@ -100,7 +140,15 @@ def replace_positive_quantifier(match: re.Match) -> str:
return result return result
def replace_number(match: re.Match) -> str: def replace_number(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
sign = match.group(1) sign = match.group(1)
number = match.group(2) number = match.group(2)
pure_decimal = match.group(5) pure_decimal = match.group(5)
@ -118,7 +166,15 @@ def replace_number(match: re.Match) -> str:
RE_RANGE = re.compile(r'(\d+)[-~](\d+)') RE_RANGE = re.compile(r'(\d+)[-~](\d+)')
def replace_range(match: re.Match) -> str: def replace_range(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
first, second = match.group(1), match.group(2) first, second = match.group(1), match.group(2)
first: str = num2str(first) first: str = num2str(first)
second: str = num2str(second) second: str = num2str(second)

View File

@ -40,9 +40,25 @@ def phone2str(phone_string: str, mobile=True) -> str:
return result return result
def replace_phone(match: re.Match) -> str: def replace_phone(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
return phone2str(match.group(0), mobile=False) return phone2str(match.group(0), mobile=False)
def replace_mobile(match: re.Match) -> str: def replace_mobile(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
return phone2str(match.group(0)) return phone2str(match.group(0))

View File

@ -20,7 +20,15 @@ from .num import num2str
RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)') RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)')
def replace_temperature(match: re.Match) -> str: def replace_temperature(match) -> str:
"""
Parameters
----------
match : re.Match
Returns
----------
str
"""
sign = match.group(1) sign = match.group(1)
temperature = match.group(2) temperature = match.group(2)
unit = match.group(3) unit = match.group(3)

View File

@ -11,3 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from parakeet.frontend.normalizer.normalizer import *
from parakeet.frontend.normalizer.numbers import *

View File

@ -17,7 +17,7 @@ from abc import abstractmethod
from g2p_en import G2p from g2p_en import G2p
from g2pM import G2pM from g2pM import G2pM
from parakeet.frontend import Vocab from parakeet.frontend.vocab import Vocab
from parakeet.frontend.normalizer.normalizer import normalize from parakeet.frontend.normalizer.normalizer import normalize
from parakeet.frontend.punctuation import get_punctuations from parakeet.frontend.punctuation import get_punctuations

View File

@ -11,3 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .fastspeech2 import *
from .tacotron2 import *
from .transformer_tts import *
from .waveflow import *

View File

@ -11,3 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .attention import *
from .conv import *
from .geometry import *
from .losses import *
from .masking import *
from .positional_encoding import *
from .transformer import *

View File

@ -11,3 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .cli import *
from .experiment import *

View File

@ -14,13 +14,13 @@
from parakeet.training.triggers.interval_trigger import IntervalTrigger from parakeet.training.triggers.interval_trigger import IntervalTrigger
def never_file_trigger(trainer): def never_fail_trigger(trainer):
return False return False
def get_trigger(trigger): def get_trigger(trigger):
if trigger is None: if trigger is None:
return never_file_trigger return never_fail_trigger
if callable(trigger): if callable(trigger):
return trigger return trigger
else: else:

View File

@ -11,3 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import checkpoint
from . import display
from . import layer_tools
from . import mp_tools
from . import scheduler