Merge branch 'master' into 'master'

make g2p independent, add dataset prototype

See merge request !1
This commit is contained in:
liuyibing01 2019-11-25 21:00:15 +08:00
commit 8c36f4539c
58 changed files with 1049 additions and 102 deletions

132
.gitignore vendored Normal file
View File

@ -0,0 +1,132 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# vscode
.vscode
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/

View File

@ -1,2 +0,0 @@
*.pyc
*.tar.*

View File

@ -4,17 +4,8 @@ Parakeet aims to provide a flexible, efficient and state-of-the-art text-to-spee
## Installation
### Install paddlepaddle
For faster training speed and better support, it is recommended that you install the lasted develop version of paddlepaddle. Please refer to the [quick installation guide](https://paddlepaddle.org.cn/install/quick).
### Other Requirements
Install other requirements with pip.
```bash
pip install -r requirements.txt
pip install parakeet
```
## Supported models

1
examples/train.sh Normal file
View File

@ -0,0 +1 @@
# train deepvoice 3 with ljspeech (just a place holder now)

View File

@ -1 +0,0 @@
This package is adapted from https://github.com/r9y9/deepvoice3_pytorch/tree/master/deepvoice3_pytorch/frontend, Copyright (c) 2017: Ryuichi Yamamoto, whose license applies.

171
notebooks/spec_show.ipynb Normal file

File diff suppressed because one or more lines are too long

1
parakeet/__init__.py Normal file
View File

@ -0,0 +1 @@
__version__ = "0.0.0"

3
parakeet/data/.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"python.pythonPath": "/Users/chenfeiyu/miniconda3/envs/paddle/bin/python"
}

101
parakeet/data/batch.py Normal file
View File

@ -0,0 +1,101 @@
"""
functions to make batch for arrays which satisfy some conditions.
"""
import numpy as np
class TextIDBatcher(object):
"""A wrapper class for a function to build a functor, which holds the configs to pass to the function."""
def __init__(self, pad_id=0, dtype=np.int64):
self.pad_id = pad_id
self.dtype = dtype
def __call__(self, minibatch):
out = batch_text_id(minibatch, pad_id=self.pad_id, dtype=self.dtype)
return out
def batch_text_id(minibatch, pad_id=0, dtype=np.int64):
"""
minibatch: List[Example]
Example: ndarray, shape(T,), dtype: int64
"""
peek_example = minibatch[0]
assert len(peek_example.shape) == 1, "text example is an 1D tensor"
lengths = [example.shape[0] for example in minibatch] # assume (channel, n_samples) or (n_samples, )
max_len = np.max(lengths)
batch = []
for example in minibatch:
pad_len = max_len - example.shape[0]
batch.append(np.pad(example, [(0, pad_len)], mode='constant', constant_values=pad_id))
return np.array(batch, dtype=dtype)
class WavBatcher(object):
def __init__(self, pad_value=0., dtype=np.float32):
self.pad_value = pad_value
self.dtype = dtype
def __call__(self, minibatch):
out = batch_wav(minibatch, pad_value=self.pad_value, dtype=self.dtype)
return out
def batch_wav(minibatch, pad_value=0., dtype=np.float32):
"""
minibatch: List[Example]
Example: ndarray, shape(C, T) for multi-channel wav, shape(T,) for mono-channel wav, dtype: float32
"""
# detect data format, maybe better to specify it in __init__
peek_example = minibatch[0]
if len(peek_example.shape) == 1:
mono_channel = True
elif len(peek_example.shape) == 2:
mono_channel = False
lengths = [example.shape[-1] for example in minibatch] # assume (channel, n_samples) or (n_samples, )
max_len = np.max(lengths)
batch = []
for example in minibatch:
pad_len = max_len - example.shape[-1]
if mono_channel:
batch.append(np.pad(example, [(0, pad_len)], mode='constant', constant_values=pad_value))
else:
batch.append(np.pad(example, [(0, 0), (0, pad_len)], mode='constant', constant_values=pad_value)) # what about PCM, no
return np.array(batch, dtype=dtype)
class SpecBatcher(object):
def __init__(self, pad_value=0., dtype=np.float32):
self.pad_value = pad_value
self.dtype = dtype
def __call__(self, minibatch):
out = batch_spec(minibatch, pad_value=self.pad_value, dtype=self.dtype)
return out
def batch_spec(minibatch, pad_value=0., dtype=np.float32):
"""
minibatch: List[Example]
Example: ndarray, shape(C, F, T) for multi-channel spectrogram, shape(F, T) for mono-channel spectrogram, dtype: float32
"""
# assume (F, T) or (C, F, T)
peek_example = minibatch[0]
if len(peek_example.shape) == 2:
mono_channel = True
elif len(peek_example.shape) == 3:
mono_channel = False
lengths = [example.shape[-1] for example in minibatch] # assume (channel, F, n_frame) or (F, n_frame)
max_len = np.max(lengths)
batch = []
for example in minibatch:
pad_len = max_len - example.shape[-1]
if mono_channel:
batch.append(np.pad(example, [(0, 0), (0, pad_len)], mode='constant', constant_values=pad_value))
else:
batch.append(np.pad(example, [(0, 0), (0, 0), (0, pad_len)], mode='constant', constant_values=pad_value)) # what about PCM, no
return np.array(batch, dtype=dtype)

View File

@ -0,0 +1,71 @@
from .sampler import SequentialSampler, RandomSampler, BatchSampler
class DataCargo(object):
def __init__(self, dataset, batch_size=1, sampler=None,
shuffle=False, batch_sampler=None, drop_last=False):
self.dataset = dataset
if batch_sampler is not None:
# auto_collation with custom batch_sampler
if batch_size != 1 or shuffle or sampler is not None or drop_last:
raise ValueError('batch_sampler option is mutually exclusive '
'with batch_size, shuffle, sampler, and '
'drop_last')
batch_size = None
drop_last = False
shuffle = False
elif batch_size is None:
raise ValueError('batch sampler is none. then batch size must not be none.')
elif sampler is None:
if shuffle:
sampler = RandomSampler(dataset)
else:
sampler = SequentialSampler(dataset)
# auto_collation without custom batch_sampler
batch_sampler = BatchSampler(sampler, batch_size, drop_last)
self.batch_size = batch_size
self.drop_last = drop_last
self.sampler = sampler
self.batch_sampler = batch_sampler
def __iter__(self):
return DataIterator(self)
@property
def _auto_collation(self):
# we will auto batching
return self.batch_sampler is not None
@property
def _index_sampler(self):
if self._auto_collation:
return self.batch_sampler
else:
return self.sampler
def __len__(self):
return len(self._index_sampler)
class DataIterator(object):
def __init__(self, loader):
self.loader = loader
self._dataset = loader.dataset
self._index_sampler = loader._index_sampler
self._sampler_iter = iter(self._index_sampler)
def __iter__(self):
return self
def __next__(self):
index = self._next_index() # may raise StopIteration, TODO(chenfeiyu): use dynamic batch size
minibatch = [self._dataset[i] for i in index] # we can abstract it, too to use dynamic batch size
minibatch = self._dataset._batch_examples(minibatch) # list[Example] -> Batch
return minibatch
def _next_index(self):
return next(self._sampler_iter)
def __len__(self):
return len(self._index_sampler)

24
parakeet/data/dataset.py Normal file
View File

@ -0,0 +1,24 @@
class Dataset(object):
def __init__(self):
pass
def _load_metadata(self):
raise NotImplementedError
def _get_example(self):
"""return a Record (or Example, Instance according to your glossary)"""
raise NotImplementedError
def _batch_examples(self, minibatch):
"""get a list of examples, return a batch, whose structure is the same as an example"""
raise NotImplementedError
def _prepare_metadata(self):
raise NotImplementedError
def __getitem__(self, index):
raise NotImplementedError
def __iter__(self):
raise NotImplementedError

209
parakeet/data/sampler.py Normal file
View File

@ -0,0 +1,209 @@
"""
At most cases, we have non-stream dataset, which means we can random access it with __getitem__, and we can get the length of the dataset with __len__.
This suffices for a sampler. We implemente sampler as iterable of valid indices. By valid, we mean 0 <= index < N, where N is the length of the dataset. We then collect several indices within a batch and use it to collect examples from the dataset with __getitem__. Then collate this examples to form a batch.
So the sampler is only responsible for generating valid indices.
"""
import numpy as np
import random
class Sampler(object):
def __init__(self, data_source):
pass
def __iter__(self):
# return a iterator of indices
# or a iterator of list[int], for BatchSampler
raise NotImplementedError
class SequentialSampler(Sampler):
def __init__(self, data_source):
self.data_source = data_source
def __iter__(self):
return iter(range(len(self.data_source)))
def __len__(self):
return len(self.data_source)
class RandomSampler(Sampler):
def __init__(self, data_source, replacement=False, num_samples=None):
self.data_source = data_source
self.replacement = replacement
self._num_samples = num_samples
if not isinstance(self.replacement, bool):
raise ValueError("replacement should be a boolean value, but got "
"replacement={}".format(self.replacement))
if self._num_samples is not None and not replacement:
raise ValueError("With replacement=False, num_samples should not be specified, "
"since a random permutation will be performed.")
if not isinstance(self.num_samples, int) or self.num_samples <= 0:
raise ValueError("num_samples should be a positive integer "
"value, but got num_samples={}".format(self.num_samples))
@property
def num_samples(self):
# dataset size might change at runtime
if self._num_samples is None:
return len(self.data_source)
return self._num_samples
def __iter__(self):
n = len(self.data_source)
if self.replacement:
return iter(np.random.randint(0, n, size=(self.num_samples,), dtype=np.int64).tolist())
return iter(np.random.permutation(n).tolist())
def __len__(self):
return self.num_samples
class SubsetRandomSampler(Sampler):
r"""Samples elements randomly from a given list of indices, without replacement.
Arguments:
indices (sequence): a sequence of indices
"""
def __init__(self, indices):
self.indices = indices
def __iter__(self):
return (self.indices[i] for i in np.random.permutation(len(self.indices)))
def __len__(self):
return len(self.indices)
class PartialyRandomizedSimilarTimeLengthSampler(Sampler):
"""Partially randmoized sampler, implemented as a example sampler
1. Sort by lengths
2. Pick a small patch and randomize it
3. Permutate mini-batchs
"""
def __init__(self, lengths, batch_size=4, batch_group_size=None,
permutate=True):
_lengths = np.array(lengths, dtype=np.int64) # maybe better implement length as a sort key
self.lengths = np.sort(_lengths)
self.sorted_indices = np.argsort(_lengths)
self.batch_size = batch_size
if batch_group_size is None:
batch_group_size = min(batch_size * 32, len(self.lengths))
if batch_group_size % batch_size != 0:
batch_group_size -= batch_group_size % batch_size
self.batch_group_size = batch_group_size
assert batch_group_size % batch_size == 0
self.permutate = permutate
def __iter__(self):
indices = np.copy(self.sorted_indices)
batch_group_size = self.batch_group_size
s, e = 0, 0
for i in range(len(indices) // batch_group_size):
s = i * batch_group_size
e = s + batch_group_size
random.shuffle(indices[s: e]) # inplace
# Permutate batches
if self.permutate:
perm = np.arange(len(indices[:e]) // self.batch_size)
random.shuffle(perm)
indices[:e] = indices[:e].reshape(-1, self.batch_size)[perm, :].reshape(-1)
# Handle last elements
s += batch_group_size
#print(indices)
if s < len(indices):
random.shuffle(indices[s:])
return iter(indices)
def __len__(self):
return len(self.sorted_indices)
class WeightedRandomSampler(Sampler):
r"""Samples elements from ``[0,..,len(weights)-1]`` with given probabilities (weights).
Args:
weights (sequence) : a sequence of weights, not necessary summing up to one
num_samples (int): number of samples to draw
replacement (bool): if ``True``, samples are drawn with replacement.
If not, they are drawn without replacement, which means that when a
sample index is drawn for a row, it cannot be drawn again for that row.
Example:
>>> list(WeightedRandomSampler([0.1, 0.9, 0.4, 0.7, 3.0, 0.6], 5, replacement=True))
[0, 0, 0, 1, 0]
>>> list(WeightedRandomSampler([0.9, 0.4, 0.05, 0.2, 0.3, 0.1], 5, replacement=False))
[0, 1, 4, 3, 2]
"""
def __init__(self, weights, num_samples, replacement):
if not isinstance(num_samples, int) or num_samples <= 0:
raise ValueError("num_samples should be a positive integer "
"value, but got num_samples={}".format(num_samples))
self.weights = np.array(weights, dtype=np.float64)
self.num_samples = num_samples
self.replacement = replacement
def __iter__(self):
return iter(np.random.choice(len(self.weights), size=(self.num_samples, ),
replace=self.replacement, p=self.weights).tolist())
def __len__(self):
return self.num_samples
class BatchSampler(Sampler):
r"""Wraps another sampler to yield a mini-batch of indices.
Args:
sampler (Sampler): Base sampler.
batch_size (int): Size of mini-batch.
drop_last (bool): If ``True``, the sampler will drop the last batch if
its size would be less than ``batch_size``
Example:
>>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False))
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
>>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True))
[[0, 1, 2], [3, 4, 5], [6, 7, 8]]
"""
def __init__(self, sampler, batch_size, drop_last):
if not isinstance(sampler, Sampler):
raise ValueError("sampler should be an instance of "
"Sampler, but got sampler={}"
.format(sampler))
if not isinstance(batch_size, int) or batch_size <= 0:
raise ValueError("batch_size should be a positive integer value, "
"but got batch_size={}".format(batch_size))
if not isinstance(drop_last, bool):
raise ValueError("drop_last should be a boolean value, but got "
"drop_last={}".format(drop_last))
self.sampler = sampler
self.batch_size = batch_size
self.drop_last = drop_last
def __iter__(self):
batch = []
for idx in self.sampler:
batch.append(idx)
if len(batch) == self.batch_size:
yield batch
batch = []
if len(batch) > 0 and not self.drop_last:
yield batch
def __len__(self):
if self.drop_last:
return len(self.sampler) // self.batch_size
else:
return (len(self.sampler) + self.batch_size - 1) // self.batch_size

View File

@ -0,0 +1,22 @@
# The Design of Dataset in Parakeet
## data & metadata
A Dataset in Parakeet is basically a list of Records (or examples, instances if you prefer this glossary.) By being a list, we mean it can be indexed by `__getitem__`, and we can get the size of the dataset by `__len__`.
This might mean we should have load the whole dataset before hand. But in practice, we do not do this due to time, computation and memory of storage limits. We actually load some metadata instead, which gives us the size of the dataset, and metadata of each record. In this case, the metadata itself is a small dataset which helps us to load a larger dataset. We made `_load_metadata` a method for all datasets.
In most cases, metadata is provided with the data. So we can load it trivially. But in other cases, we need to scan the whole dataset to get metadata. For example, the length of the the sentences, the vocabuary or the statistics of the dataset, etc. In these cases, we'd betetr save the metadata, so we do not need to generate them again and again. When implementing a dataset, we do these work in `_prepare_metadata`.
In our initial cases, record is implemented as a tuple for simplicity. Actually, it can be implemented as a dict or namespace.
## preprocessing & batching
One of the reasons we choose to load data lazily (only load metadata before hand, and load data only when needed) is computation overhead. For large dataset with complicated preprocessing, it may take several days to preprocess them. So we choose to preprocess it lazily. In practice, we implement preprocessing in `_get_example` which is called by `__getitem__`. This method preprocess only one record.
For deep learning practice, we typically batch examples. So the dataset should comes with a method to batch examples. Assuming the record is implemented as a tuple with several items. When an item is represented as a fix-sized array, to batch them is trivial, just `np.stack` suffices. But for array with dynamic size, padding is needed. We decide to implement a batching method for each item. Then batching a record can be implemented by these methods. For a dataset, a `_batch_examples` should be implemented. But in most cases, you can choose one from `batching.py`.
That is it!

View File

@ -0,0 +1,82 @@
from pathlib import Path
import numpy as np
import pandas as pd
import librosa
from .. import g2p
from ..data.sampler import SequentialSampler, RandomSampler, BatchSampler
from ..data.dataset import Dataset
from ..data.datacargo import DataCargo
from ..data.batch import TextIDBatcher, SpecBatcher
class LJSpeech(Dataset):
def __init__(self, root):
super(LJSpeech, self).__init__()
assert isinstance(root, (str, Path)), "root should be a string or Path object"
self.root = root if isinstance(root, Path) else Path(root)
self.metadata = self._prepare_metadata()
def _prepare_metadata(self):
csv_path = self.root.joinpath("metadata.csv")
metadata = pd.read_csv(csv_path, sep="|", header=None, quoting=3,
names=["fname", "raw_text", "normalized_text"])
return metadata
def _get_example(self, metadatum):
"""All the code for generating an Example from a metadatum. If you want a
different preprocessing pipeline, you can override this method.
This method may require several processor, each of which has a lot of options.
In this case, you'd better pass a composed transform and pass it to the init
method.
"""
fname, raw_text, normalized_text = metadatum
wav_path = self.root.joinpath("wavs", fname + ".wav")
# load -> trim -> preemphasis -> stft -> magnitude -> mel_scale -> logscale -> normalize
wav, sample_rate = librosa.load(wav_path, sr=None) # we would rather use functor to hold its parameters
trimed, _ = librosa.effects.trim(wav)
preemphasized = librosa.effects.preemphasis(trimed)
D = librosa.stft(preemphasized)
mag, phase = librosa.magphase(D)
mel = librosa.feature.melspectrogram(S=mag)
mag = librosa.amplitude_to_db(S=mag)
mel = librosa.amplitude_to_db(S=mel)
ref_db = 20
max_db = 100
mel = np.clip((mel - ref_db + max_db) / max_db, 1e-8, 1)
mel = np.clip((mag - ref_db + max_db) / max_db, 1e-8, 1)
phonemes = np.array(g2p.en.text_to_sequence(normalized_text), dtype=np.int64)
return (mag, mel, phonemes) # maybe we need to implement it as a map in the future
def _batch_examples(self, minibatch):
mag_batch = []
mel_batch = []
phoneme_batch = []
for example in minibatch:
mag, mel, phoneme = example
mag_batch.append(mag)
mel_batch.append(mel)
phoneme_batch.append(phoneme)
mag_batch = SpecBatcher(pad_value=0.)(mag_batch)
mel_batch = SpecBatcher(pad_value=0.)(mel_batch)
phoneme_batch = TextIDBatcher(pad_id=0)(phoneme_batch)
return (mag_batch, mel_batch, phoneme_batch)
def __getitem__(self, index):
metadatum = self.metadata.iloc[index]
example = self._get_example(metadatum)
return example
def __iter__(self):
for i in range(len(self)):
yield self[i]
def __len__(self):
return len(self.metadata)

81
parakeet/datasets/vctk.py Normal file
View File

@ -0,0 +1,81 @@
from pathlib import Path
import pandas as pd
from ruamel.yaml import YAML
import io
import librosa
import numpy as np
from parakeet.g2p.en import text_to_sequence
from parakeet.data.dataset import Dataset
from parakeet.data.datacargo import DataCargo
from parakeet.data.batch import TextIDBatcher, WavBatcher
class VCTK(Dataset):
def __init__(self, root):
assert isinstance(root, (str, Path)), "root should be a string or Path object"
self.root = root if isinstance(root, Path) else Path(root)
self.text_root = self.root.joinpath("txt")
self.wav_root = self.root.joinpath("wav48")
if not (self.root.joinpath("metadata.csv").exists() and
self.root.joinpath("speaker_indices.yaml").exists()):
self._prepare_metadata()
self.speaker_indices, self.metadata = self._load_metadata()
def _load_metadata(self):
yaml=YAML(typ='safe')
speaker_indices = yaml.load(self.root.joinpath("speaker_indices.yaml"))
metadata = pd.read_csv(self.root.joinpath("metadata.csv"),
sep="|", quoting=3, header=1)
return speaker_indices, metadata
def _prepare_metadata(self):
metadata = []
speaker_to_index = {}
for i, speaker_folder in enumerate(self.text_root.iterdir()):
if speaker_folder.is_dir():
speaker_to_index[speaker_folder.name] = i
for text_file in speaker_folder.iterdir():
if text_file.is_file():
with io.open(str(text_file)) as f:
transcription = f.read().strip()
wav_file = text_file.with_suffix(".wav")
metadata.append((wav_file.name, speaker_folder.name, transcription))
metadata = pd.DataFrame.from_records(metadata,
columns=["wave_file", "speaker", "text"])
# save them
yaml=YAML(typ='safe')
yaml.dump(speaker_to_index, self.root.joinpath("speaker_indices.yaml"))
metadata.to_csv(self.root.joinpath("metadata.csv"),
sep="|", quoting=3, index=False)
def _get_example(self, metadatum):
wave_file, speaker, text = metadatum
wav_path = self.wav_root.joinpath(speaker, wave_file)
wav, sr = librosa.load(str(wav_path), sr=None)
phoneme_seq = np.array(text_to_sequence(text))
return wav, self.speaker_indices[speaker], phoneme_seq
def __getitem__(self, index):
metadatum = self.metadata.iloc[index]
example = self._get_example(metadatum)
return example
def __len__(self):
return len(self.metadata)
def _batch_examples(self, minibatch):
wav_batch, speaker_batch, phoneme_batch = [], [], []
for example in minibatch:
wav, speaker_id, phoneme_seq = example
wav_batch.append(wav)
speaker_batch.append(speaker_id)
phoneme_batch.append(phoneme_seq)
wav_batch = WavBatcher(pad_value=0.)(wav_batch)
speaker_batch = np.array(speaker_batch)
phoneme_batch = TextIDBatcher(pad_id=0)(phoneme_batch)
return wav_batch, speaker_batch, phoneme_batch

View File

@ -1,4 +1,5 @@
# coding: utf-8
"""Text processing frontend
All frontend module should have the following functions:
@ -24,8 +25,7 @@ try:
except ImportError:
ko = None
# if you are going to use the frontend, you need to modify _characters in
# symbol.py:
# if you are going to use the frontend, you need to modify _characters in symbol.py:
# _characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\'(),-.:;? ' + '¡¿ñáéíóúÁÉÍÓÚÑ'
try:
from . import es

View File

@ -1,5 +1,7 @@
# coding: utf-8
from modules.frontend.text.symbols import symbols
from ..text.symbols import symbols
from ..text import sequence_to_text
import nltk
from random import random
@ -27,9 +29,9 @@ def mix_pronunciation(text, p):
def text_to_sequence(text, p=0.0):
if p >= 0:
text = mix_pronunciation(text, p)
from modules.frontend.text import text_to_sequence
from ..text import text_to_sequence
text = text_to_sequence(text, ["english_cleaners"])
return text
from modules.frontend.text import sequence_to_text

View File

@ -1,5 +1,6 @@
# coding: utf-8
from deepvoice3_paddle.frontend.text.symbols import symbols
from ..text.symbols import symbols
from ..text import sequence_to_text
import nltk
from random import random
@ -8,9 +9,9 @@ n_vocab = len(symbols)
def text_to_sequence(text, p=0.0):
from deepvoice3_paddle.frontend.text import text_to_sequence
from ..text import text_to_sequence
text = text_to_sequence(text, ["basic_cleaners"])
return text
from deepvoice3_paddle.frontend.text import sequence_to_text

View File

@ -1,5 +1,6 @@
# coding: utf-8
import MeCab
import jaconv
from random import random
@ -29,9 +30,9 @@ def _yomi(mecab_result):
def _mix_pronunciation(tokens, yomis, p):
return "".join(yomis[idx]
if yomis[idx] is not None and random() < p else tokens[idx]
for idx in range(len(tokens)))
return "".join(
yomis[idx] if yomis[idx] is not None and random() < p else tokens[idx]
for idx in range(len(tokens)))
def mix_pronunciation(text, p):
@ -58,7 +59,8 @@ def normalize_delimitor(text):
def text_to_sequence(text, p=0.0):
for c in [" ", " ", "", "", "", "", "", "", "", "", "", "(", ")"]:
for c in [" ", " ", "", "", "", "", "", "", "",
"", "", "(", ")"]:
text = text.replace(c, "")
text = text.replace("!", "")
text = text.replace("?", "")

View File

@ -1,5 +1,6 @@
# coding: utf-8
from random import random
n_vocab = 0xffff
@ -12,6 +13,5 @@ _tagger = None
def text_to_sequence(text, p=0.0):
return [ord(c) for c in text] + [_eos] # EOS
def sequence_to_text(seq):
return "".join(chr(n) for n in seq)

View File

@ -2,6 +2,7 @@ import re
from . import cleaners
from .symbols import symbols
# Mappings from symbol to numeric ID and vice versa:
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
_id_to_symbol = {i: s for i, s in enumerate(symbols)}

View File

@ -1,46 +1,44 @@
'''
Cleaners are transformations that run over the input text at both training and
eval time.
Cleaners are transformations that run over the input text at both training and eval time.
Cleaners can be selected by passing a comma-delimited list of cleaner names as
the "cleaners" hyperparameter. Some cleaners are English-specific. You'll
typically want to use:
Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
hyperparameter. Some cleaners are English-specific. You'll typically want to use:
1. "english_cleaners" for English text
2. "transliteration_cleaners" for non-English text that can be transliterated
to ASCII using the Unidecode library (https://pypi.python.org/pypi/Unidecode)
3. "basic_cleaners" if you do not want to transliterate (in this case, you
should also update the symbols in symbols.py to match your data).
2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
the Unidecode library (https://pypi.python.org/pypi/Unidecode)
3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
the symbols in symbols.py to match your data).
'''
import re
from unidecode import unidecode
from .numbers import normalize_numbers
# Regular expression matching whitespace:
_whitespace_re = re.compile(r'\s+')
# List of (regular expression, replacement) pairs for abbreviations:
_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1])
for x in [
('mrs', 'misess'),
('mr', 'mister'),
('dr', 'doctor'),
('st', 'saint'),
('co', 'company'),
('jr', 'junior'),
('maj', 'major'),
('gen', 'general'),
('drs', 'doctors'),
('rev', 'reverend'),
('lt', 'lieutenant'),
('hon', 'honorable'),
('sgt', 'sergeant'),
('capt', 'captain'),
('esq', 'esquire'),
('ltd', 'limited'),
('col', 'colonel'),
('ft', 'fort'),
]]
_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
('mrs', 'misess'),
('mr', 'mister'),
('dr', 'doctor'),
('st', 'saint'),
('co', 'company'),
('jr', 'junior'),
('maj', 'major'),
('gen', 'general'),
('drs', 'doctors'),
('rev', 'reverend'),
('lt', 'lieutenant'),
('hon', 'honorable'),
('sgt', 'sergeant'),
('capt', 'captain'),
('esq', 'esquire'),
('ltd', 'limited'),
('col', 'colonel'),
('ft', 'fort'),
]]
def expand_abbreviations(text):
@ -74,10 +72,7 @@ def add_punctuation(text):
def basic_cleaners(text):
'''
Basic pipeline that lowercases and collapses whitespace without
transliteration.
'''
'''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
text = lowercase(text)
text = collapse_whitespace(text)
return text
@ -92,9 +87,7 @@ def transliteration_cleaners(text):
def english_cleaners(text):
'''
Pipeline for English text, including number and abbreviation expansion.
'''
'''Pipeline for English text, including number and abbreviation expansion.'''
text = convert_to_ascii(text)
text = add_punctuation(text)
text = lowercase(text)

View File

@ -1,24 +1,21 @@
import re
valid_symbols = [
'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1',
'AH2', 'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0',
'AY1', 'AY2', 'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0',
'ER1', 'ER2', 'EY', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1',
'IH2', 'IY', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW',
'OW0', 'OW1', 'OW2', 'OY', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T',
'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W', 'Y',
'Z', 'ZH'
'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', 'AH2',
'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0', 'AY1', 'AY2',
'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0', 'ER1', 'ER2', 'EY',
'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1', 'IH2', 'IY', 'IY0', 'IY1',
'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0',
'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW',
'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH'
]
_valid_symbol_set = set(valid_symbols)
class CMUDict:
'''
Thin wrapper around CMUDict data.
http://www.speech.cs.cmu.edu/cgi-bin/cmudict
'''
'''Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict'''
def __init__(self, file_or_path, keep_ambiguous=True):
if isinstance(file_or_path, str):
@ -27,10 +24,7 @@ class CMUDict:
else:
entries = _parse_cmudict(file_or_path)
if not keep_ambiguous:
entries = {
word: pron
for word, pron in entries.items() if len(pron) == 1
}
entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
self._entries = entries
def __len__(self):

View File

@ -3,6 +3,7 @@
import inflect
import re
_inflect = inflect.engine()
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
@ -55,8 +56,7 @@ def _expand_number(m):
elif num % 100 == 0:
return _inflect.number_to_words(num // 100) + ' hundred'
else:
return _inflect.number_to_words(
num, andword='', zero='oh', group=2).replace(', ', ' ')
return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
else:
return _inflect.number_to_words(num, andword='')

View File

@ -1,9 +1,8 @@
'''
Defines the set of symbols used in text input to the model.
The default is a set of ASCII characters that works well for English or text
that has been run through Unidecode. For other data, you can modify _characters.
See TRAINING_DATA.md for details.
The default is a set of ASCII characters that works well for English or text that has been run
through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details.
'''
from .cmudict import valid_symbols

View File

Before

Width:  |  Height:  |  Size: 447 KiB

After

Width:  |  Height:  |  Size: 447 KiB

View File

@ -9,12 +9,12 @@ import argparse
import sys
import io
import numpy as np
sys.path.append("../")
# sys.path.append("../")
from hparams import hparams, hparams_debug_string
from data.data import TextDataSource, MelSpecDataSource
from data import TextDataSource, MelSpecDataSource
from nnmnkwii.datasets import FileSourceDataset
from tqdm import trange
from modules import frontend
from parakeet import g2p as frontend
def build_parser():

View File

@ -25,7 +25,7 @@ import random
# import global hyper parameters
from hparams import hparams
from modules import frontend
from parakeet import g2p as frontend
import builder
_frontend = getattr(frontend, hparams.frontend)

View File

@ -20,10 +20,10 @@ import paddle.fluid.dygraph as dg
import numpy as np
from modules import conv
from parakeet.modules import conv
from modules.modules import Embedding, PositionEmbedding
from modules.modules import FC, Conv1D, Conv1DGLU, Conv1DTranspose
from parakeet.modules.modules import Embedding, PositionEmbedding
from parakeet.modules.modules import FC, Conv1D, Conv1DGLU, Conv1DTranspose
ConvSpec = namedtuple("ConvSpec", ["out_channels", "filter_size", "dilation"])
WindowRange = namedtuple("WindowRange", ["backward", "ahead"])

View File

@ -17,7 +17,7 @@ from paddle import fluid
import paddle.fluid.dygraph as dg
from hparams import hparams, hparams_debug_string
from modules import frontend
from parakeet import g2p as frontend
from deepvoice3 import DeepVoiceTTS

View File

@ -37,7 +37,7 @@ from tensorboardX import SummaryWriter
# import global hyper parameters
from hparams import hparams
from modules import frontend
from parakeet import g2p as frontend
_frontend = getattr(frontend, hparams.frontend)

View File

@ -28,9 +28,9 @@ import nltk
from paddle import fluid
import paddle.fluid.dygraph as dg
sys.path.append("../")
# sys.path.append("../")
import audio
from modules import frontend
from parakeet import g2p as frontend
import dry_run
from hparams import hparams

View File

@ -23,20 +23,20 @@ from paddle import fluid
import paddle.fluid.dygraph as dg
import sys
sys.path.append("../")
# sys.path.append("../")
from argparse import ArgumentParser
from hparams import hparams, hparams_debug_string
from nnmnkwii.datasets import FileSourceDataset
from data.data import (TextDataSource, MelSpecDataSource,
from data import (TextDataSource, MelSpecDataSource,
LinearSpecDataSource,
PartialyRandomizedSimilarTimeLengthSampler,
Dataset, make_loader, create_batch)
from modules import frontend
from parakeet import g2p as frontend
from builder import deepvoice3, WindowRange
from dry_run import dry_run
from train_model import train_model
from modules.loss import TTSLoss
from parakeet.modules.loss import TTSLoss
from tensorboardX import SummaryWriter

View File

View File

@ -19,7 +19,7 @@ import paddle
from paddle import fluid
import paddle.fluid.dygraph as dg
from weight_norm import Conv2D, Conv2DTranspose
from .weight_norm import Conv2D, Conv2DTranspose
class Conv1D(dg.Layer):

View File

@ -18,8 +18,8 @@ import paddle.fluid.dygraph as dg
import numpy as np
import conv
import weight_norm as weight_norm
from . import conv
from . import weight_norm
def FC(name_scope,

48
setup.py Normal file
View File

@ -0,0 +1,48 @@
import os
import io
import re
from setuptools import setup, find_packages
def read(*names, **kwargs):
with io.open(
os.path.join(os.path.dirname(__file__), *names),
encoding=kwargs.get("encoding", "utf8")
) as fp:
return fp.read()
def find_version(*file_paths):
version_file = read(*file_paths)
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
version_file, re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find version string.")
VERSION = find_version('parakeet', '__init__.py')
long_description = read('README.md')
setup_info = dict(
# Metadata
name='parakeet',
version=VERSION,
author='PaddleSL Team',
author_email='',
url='https://github.com/PaddlePaddle',
description='Speech synthesis tools and models based on Paddlepaddle',
long_description=long_description,
license='Apache 2',
install_requires=[
'numpy', 'nltk', 'inflect', 'librosa', 'unidecode', 'numba',
'tqdm', 'matplotlib', 'tensorboardX', 'tensorboard', 'scipy',
'ruamel.yaml', 'pandas', 'sox',
],
# Package info
packages=find_packages(exclude=('tests', 'tests.*')),
zip_safe=True,
)
setup(**setup_info)

10
tests/test_ljspeech.py Normal file
View File

@ -0,0 +1,10 @@
from parakeet.datasets.ljspeech import LJSpeech
from parakeet.data.datacargo import DataCargo
from pathlib import Path
LJSPEECH_ROOT = Path("/workspace/datasets/LJSpeech-1.1")
ljspeech = LJSpeech(LJSPEECH_ROOT)
ljspeech_cargo = DataCargo(ljspeech, batch_size=16, shuffle=True)
for i, batch in enumerate(ljspeech_cargo):
print(i)

11
tests/test_vctk.py Normal file
View File

@ -0,0 +1,11 @@
from parakeet.datasets import vctk
from pathlib import Path
from parakeet.data.datacargo import DataCargo
root = Path("/workspace/datasets/VCTK-Corpus")
vctk_dataset = vctk.VCTK(root)
vctk_cargo = DataCargo(vctk_dataset, batch_size=16, shuffle=True, drop_last=True)
for i, batch in enumerate(vctk_cargo):
print(i)