Merge branch 'master' into 'master'
make g2p independent, add dataset prototype See merge request !1
This commit is contained in:
commit
8c36f4539c
|
@ -0,0 +1,132 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# vscode
|
||||
.vscode
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
|
@ -1,2 +0,0 @@
|
|||
*.pyc
|
||||
*.tar.*
|
11
README.md
11
README.md
|
@ -4,17 +4,8 @@ Parakeet aims to provide a flexible, efficient and state-of-the-art text-to-spee
|
|||
|
||||
## Installation
|
||||
|
||||
### Install paddlepaddle
|
||||
|
||||
For faster training speed and better support, it is recommended that you install the lasted develop version of paddlepaddle. Please refer to the [quick installation guide](https://paddlepaddle.org.cn/install/quick).
|
||||
|
||||
### Other Requirements
|
||||
|
||||
Install other requirements with pip.
|
||||
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
pip install parakeet
|
||||
```
|
||||
|
||||
## Supported models
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
# train deepvoice 3 with ljspeech (just a place holder now)
|
|
@ -1 +0,0 @@
|
|||
This package is adapted from https://github.com/r9y9/deepvoice3_pytorch/tree/master/deepvoice3_pytorch/frontend, Copyright (c) 2017: Ryuichi Yamamoto, whose license applies.
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
__version__ = "0.0.0"
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"python.pythonPath": "/Users/chenfeiyu/miniconda3/envs/paddle/bin/python"
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
"""
|
||||
functions to make batch for arrays which satisfy some conditions.
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
class TextIDBatcher(object):
|
||||
"""A wrapper class for a function to build a functor, which holds the configs to pass to the function."""
|
||||
def __init__(self, pad_id=0, dtype=np.int64):
|
||||
self.pad_id = pad_id
|
||||
self.dtype = dtype
|
||||
|
||||
def __call__(self, minibatch):
|
||||
out = batch_text_id(minibatch, pad_id=self.pad_id, dtype=self.dtype)
|
||||
return out
|
||||
|
||||
def batch_text_id(minibatch, pad_id=0, dtype=np.int64):
|
||||
"""
|
||||
minibatch: List[Example]
|
||||
Example: ndarray, shape(T,), dtype: int64
|
||||
"""
|
||||
peek_example = minibatch[0]
|
||||
assert len(peek_example.shape) == 1, "text example is an 1D tensor"
|
||||
|
||||
lengths = [example.shape[0] for example in minibatch] # assume (channel, n_samples) or (n_samples, )
|
||||
max_len = np.max(lengths)
|
||||
|
||||
batch = []
|
||||
for example in minibatch:
|
||||
pad_len = max_len - example.shape[0]
|
||||
batch.append(np.pad(example, [(0, pad_len)], mode='constant', constant_values=pad_id))
|
||||
|
||||
return np.array(batch, dtype=dtype)
|
||||
|
||||
class WavBatcher(object):
|
||||
def __init__(self, pad_value=0., dtype=np.float32):
|
||||
self.pad_value = pad_value
|
||||
self.dtype = dtype
|
||||
|
||||
def __call__(self, minibatch):
|
||||
out = batch_wav(minibatch, pad_value=self.pad_value, dtype=self.dtype)
|
||||
return out
|
||||
|
||||
def batch_wav(minibatch, pad_value=0., dtype=np.float32):
|
||||
"""
|
||||
minibatch: List[Example]
|
||||
Example: ndarray, shape(C, T) for multi-channel wav, shape(T,) for mono-channel wav, dtype: float32
|
||||
"""
|
||||
# detect data format, maybe better to specify it in __init__
|
||||
peek_example = minibatch[0]
|
||||
if len(peek_example.shape) == 1:
|
||||
mono_channel = True
|
||||
elif len(peek_example.shape) == 2:
|
||||
mono_channel = False
|
||||
|
||||
lengths = [example.shape[-1] for example in minibatch] # assume (channel, n_samples) or (n_samples, )
|
||||
max_len = np.max(lengths)
|
||||
|
||||
batch = []
|
||||
for example in minibatch:
|
||||
pad_len = max_len - example.shape[-1]
|
||||
if mono_channel:
|
||||
batch.append(np.pad(example, [(0, pad_len)], mode='constant', constant_values=pad_value))
|
||||
else:
|
||||
batch.append(np.pad(example, [(0, 0), (0, pad_len)], mode='constant', constant_values=pad_value)) # what about PCM, no
|
||||
|
||||
return np.array(batch, dtype=dtype)
|
||||
|
||||
|
||||
class SpecBatcher(object):
|
||||
def __init__(self, pad_value=0., dtype=np.float32):
|
||||
self.pad_value = pad_value
|
||||
self.dtype = dtype
|
||||
|
||||
def __call__(self, minibatch):
|
||||
out = batch_spec(minibatch, pad_value=self.pad_value, dtype=self.dtype)
|
||||
return out
|
||||
|
||||
def batch_spec(minibatch, pad_value=0., dtype=np.float32):
|
||||
"""
|
||||
minibatch: List[Example]
|
||||
Example: ndarray, shape(C, F, T) for multi-channel spectrogram, shape(F, T) for mono-channel spectrogram, dtype: float32
|
||||
"""
|
||||
# assume (F, T) or (C, F, T)
|
||||
peek_example = minibatch[0]
|
||||
if len(peek_example.shape) == 2:
|
||||
mono_channel = True
|
||||
elif len(peek_example.shape) == 3:
|
||||
mono_channel = False
|
||||
|
||||
lengths = [example.shape[-1] for example in minibatch] # assume (channel, F, n_frame) or (F, n_frame)
|
||||
max_len = np.max(lengths)
|
||||
|
||||
batch = []
|
||||
for example in minibatch:
|
||||
pad_len = max_len - example.shape[-1]
|
||||
if mono_channel:
|
||||
batch.append(np.pad(example, [(0, 0), (0, pad_len)], mode='constant', constant_values=pad_value))
|
||||
else:
|
||||
batch.append(np.pad(example, [(0, 0), (0, 0), (0, pad_len)], mode='constant', constant_values=pad_value)) # what about PCM, no
|
||||
|
||||
return np.array(batch, dtype=dtype)
|
|
@ -0,0 +1,71 @@
|
|||
from .sampler import SequentialSampler, RandomSampler, BatchSampler
|
||||
|
||||
class DataCargo(object):
|
||||
def __init__(self, dataset, batch_size=1, sampler=None,
|
||||
shuffle=False, batch_sampler=None, drop_last=False):
|
||||
self.dataset = dataset
|
||||
|
||||
if batch_sampler is not None:
|
||||
# auto_collation with custom batch_sampler
|
||||
if batch_size != 1 or shuffle or sampler is not None or drop_last:
|
||||
raise ValueError('batch_sampler option is mutually exclusive '
|
||||
'with batch_size, shuffle, sampler, and '
|
||||
'drop_last')
|
||||
batch_size = None
|
||||
drop_last = False
|
||||
shuffle = False
|
||||
elif batch_size is None:
|
||||
raise ValueError('batch sampler is none. then batch size must not be none.')
|
||||
elif sampler is None:
|
||||
if shuffle:
|
||||
sampler = RandomSampler(dataset)
|
||||
else:
|
||||
sampler = SequentialSampler(dataset)
|
||||
# auto_collation without custom batch_sampler
|
||||
batch_sampler = BatchSampler(sampler, batch_size, drop_last)
|
||||
|
||||
self.batch_size = batch_size
|
||||
self.drop_last = drop_last
|
||||
self.sampler = sampler
|
||||
self.batch_sampler = batch_sampler
|
||||
|
||||
def __iter__(self):
|
||||
return DataIterator(self)
|
||||
|
||||
@property
|
||||
def _auto_collation(self):
|
||||
# we will auto batching
|
||||
return self.batch_sampler is not None
|
||||
|
||||
@property
|
||||
def _index_sampler(self):
|
||||
if self._auto_collation:
|
||||
return self.batch_sampler
|
||||
else:
|
||||
return self.sampler
|
||||
|
||||
def __len__(self):
|
||||
return len(self._index_sampler)
|
||||
|
||||
class DataIterator(object):
|
||||
def __init__(self, loader):
|
||||
self.loader = loader
|
||||
self._dataset = loader.dataset
|
||||
|
||||
self._index_sampler = loader._index_sampler
|
||||
self._sampler_iter = iter(self._index_sampler)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
index = self._next_index() # may raise StopIteration, TODO(chenfeiyu): use dynamic batch size
|
||||
minibatch = [self._dataset[i] for i in index] # we can abstract it, too to use dynamic batch size
|
||||
minibatch = self._dataset._batch_examples(minibatch) # list[Example] -> Batch
|
||||
return minibatch
|
||||
|
||||
def _next_index(self):
|
||||
return next(self._sampler_iter)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._index_sampler)
|
|
@ -0,0 +1,24 @@
|
|||
class Dataset(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _load_metadata(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _get_example(self):
|
||||
"""return a Record (or Example, Instance according to your glossary)"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _batch_examples(self, minibatch):
|
||||
"""get a list of examples, return a batch, whose structure is the same as an example"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _prepare_metadata(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def __getitem__(self, index):
|
||||
raise NotImplementedError
|
||||
|
||||
def __iter__(self):
|
||||
raise NotImplementedError
|
||||
|
|
@ -0,0 +1,209 @@
|
|||
"""
|
||||
At most cases, we have non-stream dataset, which means we can random access it with __getitem__, and we can get the length of the dataset with __len__.
|
||||
|
||||
This suffices for a sampler. We implemente sampler as iterable of valid indices. By valid, we mean 0 <= index < N, where N is the length of the dataset. We then collect several indices within a batch and use it to collect examples from the dataset with __getitem__. Then collate this examples to form a batch.
|
||||
|
||||
So the sampler is only responsible for generating valid indices.
|
||||
"""
|
||||
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
class Sampler(object):
|
||||
def __init__(self, data_source):
|
||||
pass
|
||||
|
||||
def __iter__(self):
|
||||
# return a iterator of indices
|
||||
# or a iterator of list[int], for BatchSampler
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class SequentialSampler(Sampler):
|
||||
def __init__(self, data_source):
|
||||
self.data_source = data_source
|
||||
|
||||
def __iter__(self):
|
||||
return iter(range(len(self.data_source)))
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data_source)
|
||||
|
||||
|
||||
class RandomSampler(Sampler):
|
||||
def __init__(self, data_source, replacement=False, num_samples=None):
|
||||
self.data_source = data_source
|
||||
self.replacement = replacement
|
||||
self._num_samples = num_samples
|
||||
|
||||
if not isinstance(self.replacement, bool):
|
||||
raise ValueError("replacement should be a boolean value, but got "
|
||||
"replacement={}".format(self.replacement))
|
||||
|
||||
if self._num_samples is not None and not replacement:
|
||||
raise ValueError("With replacement=False, num_samples should not be specified, "
|
||||
"since a random permutation will be performed.")
|
||||
|
||||
if not isinstance(self.num_samples, int) or self.num_samples <= 0:
|
||||
raise ValueError("num_samples should be a positive integer "
|
||||
"value, but got num_samples={}".format(self.num_samples))
|
||||
|
||||
@property
|
||||
def num_samples(self):
|
||||
# dataset size might change at runtime
|
||||
if self._num_samples is None:
|
||||
return len(self.data_source)
|
||||
return self._num_samples
|
||||
|
||||
def __iter__(self):
|
||||
n = len(self.data_source)
|
||||
if self.replacement:
|
||||
return iter(np.random.randint(0, n, size=(self.num_samples,), dtype=np.int64).tolist())
|
||||
return iter(np.random.permutation(n).tolist())
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples
|
||||
|
||||
|
||||
class SubsetRandomSampler(Sampler):
|
||||
r"""Samples elements randomly from a given list of indices, without replacement.
|
||||
Arguments:
|
||||
indices (sequence): a sequence of indices
|
||||
"""
|
||||
|
||||
def __init__(self, indices):
|
||||
self.indices = indices
|
||||
|
||||
def __iter__(self):
|
||||
return (self.indices[i] for i in np.random.permutation(len(self.indices)))
|
||||
|
||||
def __len__(self):
|
||||
return len(self.indices)
|
||||
|
||||
|
||||
class PartialyRandomizedSimilarTimeLengthSampler(Sampler):
|
||||
"""Partially randmoized sampler, implemented as a example sampler
|
||||
1. Sort by lengths
|
||||
2. Pick a small patch and randomize it
|
||||
3. Permutate mini-batchs
|
||||
"""
|
||||
|
||||
def __init__(self, lengths, batch_size=4, batch_group_size=None,
|
||||
permutate=True):
|
||||
_lengths = np.array(lengths, dtype=np.int64) # maybe better implement length as a sort key
|
||||
self.lengths = np.sort(_lengths)
|
||||
self.sorted_indices = np.argsort(_lengths)
|
||||
|
||||
self.batch_size = batch_size
|
||||
if batch_group_size is None:
|
||||
batch_group_size = min(batch_size * 32, len(self.lengths))
|
||||
if batch_group_size % batch_size != 0:
|
||||
batch_group_size -= batch_group_size % batch_size
|
||||
|
||||
self.batch_group_size = batch_group_size
|
||||
assert batch_group_size % batch_size == 0
|
||||
self.permutate = permutate
|
||||
|
||||
def __iter__(self):
|
||||
indices = np.copy(self.sorted_indices)
|
||||
batch_group_size = self.batch_group_size
|
||||
s, e = 0, 0
|
||||
for i in range(len(indices) // batch_group_size):
|
||||
s = i * batch_group_size
|
||||
e = s + batch_group_size
|
||||
random.shuffle(indices[s: e]) # inplace
|
||||
|
||||
# Permutate batches
|
||||
if self.permutate:
|
||||
perm = np.arange(len(indices[:e]) // self.batch_size)
|
||||
random.shuffle(perm)
|
||||
indices[:e] = indices[:e].reshape(-1, self.batch_size)[perm, :].reshape(-1)
|
||||
|
||||
# Handle last elements
|
||||
s += batch_group_size
|
||||
#print(indices)
|
||||
if s < len(indices):
|
||||
random.shuffle(indices[s:])
|
||||
|
||||
return iter(indices)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.sorted_indices)
|
||||
|
||||
|
||||
class WeightedRandomSampler(Sampler):
|
||||
r"""Samples elements from ``[0,..,len(weights)-1]`` with given probabilities (weights).
|
||||
Args:
|
||||
weights (sequence) : a sequence of weights, not necessary summing up to one
|
||||
num_samples (int): number of samples to draw
|
||||
replacement (bool): if ``True``, samples are drawn with replacement.
|
||||
If not, they are drawn without replacement, which means that when a
|
||||
sample index is drawn for a row, it cannot be drawn again for that row.
|
||||
Example:
|
||||
>>> list(WeightedRandomSampler([0.1, 0.9, 0.4, 0.7, 3.0, 0.6], 5, replacement=True))
|
||||
[0, 0, 0, 1, 0]
|
||||
>>> list(WeightedRandomSampler([0.9, 0.4, 0.05, 0.2, 0.3, 0.1], 5, replacement=False))
|
||||
[0, 1, 4, 3, 2]
|
||||
"""
|
||||
|
||||
def __init__(self, weights, num_samples, replacement):
|
||||
if not isinstance(num_samples, int) or num_samples <= 0:
|
||||
raise ValueError("num_samples should be a positive integer "
|
||||
"value, but got num_samples={}".format(num_samples))
|
||||
self.weights = np.array(weights, dtype=np.float64)
|
||||
self.num_samples = num_samples
|
||||
self.replacement = replacement
|
||||
|
||||
def __iter__(self):
|
||||
return iter(np.random.choice(len(self.weights), size=(self.num_samples, ),
|
||||
replace=self.replacement, p=self.weights).tolist())
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples
|
||||
|
||||
|
||||
class BatchSampler(Sampler):
|
||||
r"""Wraps another sampler to yield a mini-batch of indices.
|
||||
Args:
|
||||
sampler (Sampler): Base sampler.
|
||||
batch_size (int): Size of mini-batch.
|
||||
drop_last (bool): If ``True``, the sampler will drop the last batch if
|
||||
its size would be less than ``batch_size``
|
||||
Example:
|
||||
>>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False))
|
||||
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
|
||||
>>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True))
|
||||
[[0, 1, 2], [3, 4, 5], [6, 7, 8]]
|
||||
"""
|
||||
|
||||
def __init__(self, sampler, batch_size, drop_last):
|
||||
if not isinstance(sampler, Sampler):
|
||||
raise ValueError("sampler should be an instance of "
|
||||
"Sampler, but got sampler={}"
|
||||
.format(sampler))
|
||||
if not isinstance(batch_size, int) or batch_size <= 0:
|
||||
raise ValueError("batch_size should be a positive integer value, "
|
||||
"but got batch_size={}".format(batch_size))
|
||||
if not isinstance(drop_last, bool):
|
||||
raise ValueError("drop_last should be a boolean value, but got "
|
||||
"drop_last={}".format(drop_last))
|
||||
self.sampler = sampler
|
||||
self.batch_size = batch_size
|
||||
self.drop_last = drop_last
|
||||
|
||||
def __iter__(self):
|
||||
batch = []
|
||||
for idx in self.sampler:
|
||||
batch.append(idx)
|
||||
if len(batch) == self.batch_size:
|
||||
yield batch
|
||||
batch = []
|
||||
if len(batch) > 0 and not self.drop_last:
|
||||
yield batch
|
||||
|
||||
def __len__(self):
|
||||
if self.drop_last:
|
||||
return len(self.sampler) // self.batch_size
|
||||
else:
|
||||
return (len(self.sampler) + self.batch_size - 1) // self.batch_size
|
|
@ -0,0 +1,22 @@
|
|||
# The Design of Dataset in Parakeet
|
||||
|
||||
## data & metadata
|
||||
A Dataset in Parakeet is basically a list of Records (or examples, instances if you prefer this glossary.) By being a list, we mean it can be indexed by `__getitem__`, and we can get the size of the dataset by `__len__`.
|
||||
|
||||
This might mean we should have load the whole dataset before hand. But in practice, we do not do this due to time, computation and memory of storage limits. We actually load some metadata instead, which gives us the size of the dataset, and metadata of each record. In this case, the metadata itself is a small dataset which helps us to load a larger dataset. We made `_load_metadata` a method for all datasets.
|
||||
|
||||
In most cases, metadata is provided with the data. So we can load it trivially. But in other cases, we need to scan the whole dataset to get metadata. For example, the length of the the sentences, the vocabuary or the statistics of the dataset, etc. In these cases, we'd betetr save the metadata, so we do not need to generate them again and again. When implementing a dataset, we do these work in `_prepare_metadata`.
|
||||
|
||||
In our initial cases, record is implemented as a tuple for simplicity. Actually, it can be implemented as a dict or namespace.
|
||||
|
||||
## preprocessing & batching
|
||||
One of the reasons we choose to load data lazily (only load metadata before hand, and load data only when needed) is computation overhead. For large dataset with complicated preprocessing, it may take several days to preprocess them. So we choose to preprocess it lazily. In practice, we implement preprocessing in `_get_example` which is called by `__getitem__`. This method preprocess only one record.
|
||||
|
||||
For deep learning practice, we typically batch examples. So the dataset should comes with a method to batch examples. Assuming the record is implemented as a tuple with several items. When an item is represented as a fix-sized array, to batch them is trivial, just `np.stack` suffices. But for array with dynamic size, padding is needed. We decide to implement a batching method for each item. Then batching a record can be implemented by these methods. For a dataset, a `_batch_examples` should be implemented. But in most cases, you can choose one from `batching.py`.
|
||||
|
||||
That is it!
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
from pathlib import Path
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import librosa
|
||||
from .. import g2p
|
||||
|
||||
from ..data.sampler import SequentialSampler, RandomSampler, BatchSampler
|
||||
from ..data.dataset import Dataset
|
||||
from ..data.datacargo import DataCargo
|
||||
from ..data.batch import TextIDBatcher, SpecBatcher
|
||||
|
||||
|
||||
class LJSpeech(Dataset):
|
||||
def __init__(self, root):
|
||||
super(LJSpeech, self).__init__()
|
||||
assert isinstance(root, (str, Path)), "root should be a string or Path object"
|
||||
self.root = root if isinstance(root, Path) else Path(root)
|
||||
self.metadata = self._prepare_metadata()
|
||||
|
||||
def _prepare_metadata(self):
|
||||
csv_path = self.root.joinpath("metadata.csv")
|
||||
metadata = pd.read_csv(csv_path, sep="|", header=None, quoting=3,
|
||||
names=["fname", "raw_text", "normalized_text"])
|
||||
return metadata
|
||||
|
||||
def _get_example(self, metadatum):
|
||||
"""All the code for generating an Example from a metadatum. If you want a
|
||||
different preprocessing pipeline, you can override this method.
|
||||
This method may require several processor, each of which has a lot of options.
|
||||
In this case, you'd better pass a composed transform and pass it to the init
|
||||
method.
|
||||
"""
|
||||
|
||||
fname, raw_text, normalized_text = metadatum
|
||||
wav_path = self.root.joinpath("wavs", fname + ".wav")
|
||||
|
||||
# load -> trim -> preemphasis -> stft -> magnitude -> mel_scale -> logscale -> normalize
|
||||
wav, sample_rate = librosa.load(wav_path, sr=None) # we would rather use functor to hold its parameters
|
||||
trimed, _ = librosa.effects.trim(wav)
|
||||
preemphasized = librosa.effects.preemphasis(trimed)
|
||||
D = librosa.stft(preemphasized)
|
||||
mag, phase = librosa.magphase(D)
|
||||
mel = librosa.feature.melspectrogram(S=mag)
|
||||
|
||||
mag = librosa.amplitude_to_db(S=mag)
|
||||
mel = librosa.amplitude_to_db(S=mel)
|
||||
|
||||
ref_db = 20
|
||||
max_db = 100
|
||||
mel = np.clip((mel - ref_db + max_db) / max_db, 1e-8, 1)
|
||||
mel = np.clip((mag - ref_db + max_db) / max_db, 1e-8, 1)
|
||||
|
||||
phonemes = np.array(g2p.en.text_to_sequence(normalized_text), dtype=np.int64)
|
||||
return (mag, mel, phonemes) # maybe we need to implement it as a map in the future
|
||||
|
||||
def _batch_examples(self, minibatch):
|
||||
mag_batch = []
|
||||
mel_batch = []
|
||||
phoneme_batch = []
|
||||
for example in minibatch:
|
||||
mag, mel, phoneme = example
|
||||
mag_batch.append(mag)
|
||||
mel_batch.append(mel)
|
||||
phoneme_batch.append(phoneme)
|
||||
mag_batch = SpecBatcher(pad_value=0.)(mag_batch)
|
||||
mel_batch = SpecBatcher(pad_value=0.)(mel_batch)
|
||||
phoneme_batch = TextIDBatcher(pad_id=0)(phoneme_batch)
|
||||
return (mag_batch, mel_batch, phoneme_batch)
|
||||
|
||||
def __getitem__(self, index):
|
||||
metadatum = self.metadata.iloc[index]
|
||||
example = self._get_example(metadatum)
|
||||
return example
|
||||
|
||||
def __iter__(self):
|
||||
for i in range(len(self)):
|
||||
yield self[i]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.metadata)
|
||||
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
from pathlib import Path
|
||||
import pandas as pd
|
||||
from ruamel.yaml import YAML
|
||||
import io
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
|
||||
from parakeet.g2p.en import text_to_sequence
|
||||
from parakeet.data.dataset import Dataset
|
||||
from parakeet.data.datacargo import DataCargo
|
||||
from parakeet.data.batch import TextIDBatcher, WavBatcher
|
||||
|
||||
class VCTK(Dataset):
|
||||
def __init__(self, root):
|
||||
assert isinstance(root, (str, Path)), "root should be a string or Path object"
|
||||
self.root = root if isinstance(root, Path) else Path(root)
|
||||
self.text_root = self.root.joinpath("txt")
|
||||
self.wav_root = self.root.joinpath("wav48")
|
||||
|
||||
if not (self.root.joinpath("metadata.csv").exists() and
|
||||
self.root.joinpath("speaker_indices.yaml").exists()):
|
||||
self._prepare_metadata()
|
||||
self.speaker_indices, self.metadata = self._load_metadata()
|
||||
|
||||
def _load_metadata(self):
|
||||
yaml=YAML(typ='safe')
|
||||
speaker_indices = yaml.load(self.root.joinpath("speaker_indices.yaml"))
|
||||
metadata = pd.read_csv(self.root.joinpath("metadata.csv"),
|
||||
sep="|", quoting=3, header=1)
|
||||
return speaker_indices, metadata
|
||||
|
||||
def _prepare_metadata(self):
|
||||
metadata = []
|
||||
speaker_to_index = {}
|
||||
for i, speaker_folder in enumerate(self.text_root.iterdir()):
|
||||
if speaker_folder.is_dir():
|
||||
speaker_to_index[speaker_folder.name] = i
|
||||
for text_file in speaker_folder.iterdir():
|
||||
if text_file.is_file():
|
||||
with io.open(str(text_file)) as f:
|
||||
transcription = f.read().strip()
|
||||
wav_file = text_file.with_suffix(".wav")
|
||||
metadata.append((wav_file.name, speaker_folder.name, transcription))
|
||||
metadata = pd.DataFrame.from_records(metadata,
|
||||
columns=["wave_file", "speaker", "text"])
|
||||
|
||||
# save them
|
||||
yaml=YAML(typ='safe')
|
||||
yaml.dump(speaker_to_index, self.root.joinpath("speaker_indices.yaml"))
|
||||
metadata.to_csv(self.root.joinpath("metadata.csv"),
|
||||
sep="|", quoting=3, index=False)
|
||||
|
||||
def _get_example(self, metadatum):
|
||||
wave_file, speaker, text = metadatum
|
||||
wav_path = self.wav_root.joinpath(speaker, wave_file)
|
||||
wav, sr = librosa.load(str(wav_path), sr=None)
|
||||
phoneme_seq = np.array(text_to_sequence(text))
|
||||
return wav, self.speaker_indices[speaker], phoneme_seq
|
||||
|
||||
def __getitem__(self, index):
|
||||
metadatum = self.metadata.iloc[index]
|
||||
example = self._get_example(metadatum)
|
||||
return example
|
||||
|
||||
def __len__(self):
|
||||
return len(self.metadata)
|
||||
|
||||
def _batch_examples(self, minibatch):
|
||||
wav_batch, speaker_batch, phoneme_batch = [], [], []
|
||||
for example in minibatch:
|
||||
wav, speaker_id, phoneme_seq = example
|
||||
wav_batch.append(wav)
|
||||
speaker_batch.append(speaker_id)
|
||||
phoneme_batch.append(phoneme_seq)
|
||||
wav_batch = WavBatcher(pad_value=0.)(wav_batch)
|
||||
speaker_batch = np.array(speaker_batch)
|
||||
phoneme_batch = TextIDBatcher(pad_id=0)(phoneme_batch)
|
||||
return wav_batch, speaker_batch, phoneme_batch
|
||||
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
# coding: utf-8
|
||||
|
||||
"""Text processing frontend
|
||||
|
||||
All frontend module should have the following functions:
|
||||
|
@ -24,8 +25,7 @@ try:
|
|||
except ImportError:
|
||||
ko = None
|
||||
|
||||
# if you are going to use the frontend, you need to modify _characters in
|
||||
# symbol.py:
|
||||
# if you are going to use the frontend, you need to modify _characters in symbol.py:
|
||||
# _characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\'(),-.:;? ' + '¡¿ñáéíóúÁÉÍÓÚÑ'
|
||||
try:
|
||||
from . import es
|
|
@ -1,5 +1,7 @@
|
|||
# coding: utf-8
|
||||
from modules.frontend.text.symbols import symbols
|
||||
|
||||
from ..text.symbols import symbols
|
||||
from ..text import sequence_to_text
|
||||
|
||||
import nltk
|
||||
from random import random
|
||||
|
@ -27,9 +29,9 @@ def mix_pronunciation(text, p):
|
|||
def text_to_sequence(text, p=0.0):
|
||||
if p >= 0:
|
||||
text = mix_pronunciation(text, p)
|
||||
from modules.frontend.text import text_to_sequence
|
||||
from ..text import text_to_sequence
|
||||
text = text_to_sequence(text, ["english_cleaners"])
|
||||
return text
|
||||
|
||||
|
||||
from modules.frontend.text import sequence_to_text
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
from deepvoice3_paddle.frontend.text.symbols import symbols
|
||||
from ..text.symbols import symbols
|
||||
from ..text import sequence_to_text
|
||||
|
||||
import nltk
|
||||
from random import random
|
||||
|
@ -8,9 +9,9 @@ n_vocab = len(symbols)
|
|||
|
||||
|
||||
def text_to_sequence(text, p=0.0):
|
||||
from deepvoice3_paddle.frontend.text import text_to_sequence
|
||||
from ..text import text_to_sequence
|
||||
text = text_to_sequence(text, ["basic_cleaners"])
|
||||
return text
|
||||
|
||||
|
||||
from deepvoice3_paddle.frontend.text import sequence_to_text
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
|
||||
|
||||
import MeCab
|
||||
import jaconv
|
||||
from random import random
|
||||
|
@ -29,9 +30,9 @@ def _yomi(mecab_result):
|
|||
|
||||
|
||||
def _mix_pronunciation(tokens, yomis, p):
|
||||
return "".join(yomis[idx]
|
||||
if yomis[idx] is not None and random() < p else tokens[idx]
|
||||
for idx in range(len(tokens)))
|
||||
return "".join(
|
||||
yomis[idx] if yomis[idx] is not None and random() < p else tokens[idx]
|
||||
for idx in range(len(tokens)))
|
||||
|
||||
|
||||
def mix_pronunciation(text, p):
|
||||
|
@ -58,7 +59,8 @@ def normalize_delimitor(text):
|
|||
|
||||
|
||||
def text_to_sequence(text, p=0.0):
|
||||
for c in [" ", " ", "「", "」", "『", "』", "・", "【", "】", "(", ")", "(", ")"]:
|
||||
for c in [" ", " ", "「", "」", "『", "』", "・", "【", "】",
|
||||
"(", ")", "(", ")"]:
|
||||
text = text.replace(c, "")
|
||||
text = text.replace("!", "!")
|
||||
text = text.replace("?", "?")
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
|
||||
|
||||
from random import random
|
||||
|
||||
n_vocab = 0xffff
|
||||
|
@ -12,6 +13,5 @@ _tagger = None
|
|||
def text_to_sequence(text, p=0.0):
|
||||
return [ord(c) for c in text] + [_eos] # EOS
|
||||
|
||||
|
||||
def sequence_to_text(seq):
|
||||
return "".join(chr(n) for n in seq)
|
|
@ -2,6 +2,7 @@ import re
|
|||
from . import cleaners
|
||||
from .symbols import symbols
|
||||
|
||||
|
||||
# Mappings from symbol to numeric ID and vice versa:
|
||||
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
||||
_id_to_symbol = {i: s for i, s in enumerate(symbols)}
|
|
@ -1,46 +1,44 @@
|
|||
'''
|
||||
Cleaners are transformations that run over the input text at both training and
|
||||
eval time.
|
||||
Cleaners are transformations that run over the input text at both training and eval time.
|
||||
|
||||
Cleaners can be selected by passing a comma-delimited list of cleaner names as
|
||||
the "cleaners" hyperparameter. Some cleaners are English-specific. You'll
|
||||
typically want to use:
|
||||
Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
|
||||
hyperparameter. Some cleaners are English-specific. You'll typically want to use:
|
||||
1. "english_cleaners" for English text
|
||||
2. "transliteration_cleaners" for non-English text that can be transliterated
|
||||
to ASCII using the Unidecode library (https://pypi.python.org/pypi/Unidecode)
|
||||
3. "basic_cleaners" if you do not want to transliterate (in this case, you
|
||||
should also update the symbols in symbols.py to match your data).
|
||||
2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
|
||||
the Unidecode library (https://pypi.python.org/pypi/Unidecode)
|
||||
3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
|
||||
the symbols in symbols.py to match your data).
|
||||
'''
|
||||
|
||||
import re
|
||||
from unidecode import unidecode
|
||||
from .numbers import normalize_numbers
|
||||
|
||||
|
||||
# Regular expression matching whitespace:
|
||||
_whitespace_re = re.compile(r'\s+')
|
||||
|
||||
# List of (regular expression, replacement) pairs for abbreviations:
|
||||
_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1])
|
||||
for x in [
|
||||
('mrs', 'misess'),
|
||||
('mr', 'mister'),
|
||||
('dr', 'doctor'),
|
||||
('st', 'saint'),
|
||||
('co', 'company'),
|
||||
('jr', 'junior'),
|
||||
('maj', 'major'),
|
||||
('gen', 'general'),
|
||||
('drs', 'doctors'),
|
||||
('rev', 'reverend'),
|
||||
('lt', 'lieutenant'),
|
||||
('hon', 'honorable'),
|
||||
('sgt', 'sergeant'),
|
||||
('capt', 'captain'),
|
||||
('esq', 'esquire'),
|
||||
('ltd', 'limited'),
|
||||
('col', 'colonel'),
|
||||
('ft', 'fort'),
|
||||
]]
|
||||
_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
|
||||
('mrs', 'misess'),
|
||||
('mr', 'mister'),
|
||||
('dr', 'doctor'),
|
||||
('st', 'saint'),
|
||||
('co', 'company'),
|
||||
('jr', 'junior'),
|
||||
('maj', 'major'),
|
||||
('gen', 'general'),
|
||||
('drs', 'doctors'),
|
||||
('rev', 'reverend'),
|
||||
('lt', 'lieutenant'),
|
||||
('hon', 'honorable'),
|
||||
('sgt', 'sergeant'),
|
||||
('capt', 'captain'),
|
||||
('esq', 'esquire'),
|
||||
('ltd', 'limited'),
|
||||
('col', 'colonel'),
|
||||
('ft', 'fort'),
|
||||
]]
|
||||
|
||||
|
||||
def expand_abbreviations(text):
|
||||
|
@ -74,10 +72,7 @@ def add_punctuation(text):
|
|||
|
||||
|
||||
def basic_cleaners(text):
|
||||
'''
|
||||
Basic pipeline that lowercases and collapses whitespace without
|
||||
transliteration.
|
||||
'''
|
||||
'''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
|
||||
text = lowercase(text)
|
||||
text = collapse_whitespace(text)
|
||||
return text
|
||||
|
@ -92,9 +87,7 @@ def transliteration_cleaners(text):
|
|||
|
||||
|
||||
def english_cleaners(text):
|
||||
'''
|
||||
Pipeline for English text, including number and abbreviation expansion.
|
||||
'''
|
||||
'''Pipeline for English text, including number and abbreviation expansion.'''
|
||||
text = convert_to_ascii(text)
|
||||
text = add_punctuation(text)
|
||||
text = lowercase(text)
|
|
@ -1,24 +1,21 @@
|
|||
import re
|
||||
|
||||
|
||||
valid_symbols = [
|
||||
'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1',
|
||||
'AH2', 'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0',
|
||||
'AY1', 'AY2', 'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0',
|
||||
'ER1', 'ER2', 'EY', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1',
|
||||
'IH2', 'IY', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW',
|
||||
'OW0', 'OW1', 'OW2', 'OY', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T',
|
||||
'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W', 'Y',
|
||||
'Z', 'ZH'
|
||||
'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', 'AH2',
|
||||
'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0', 'AY1', 'AY2',
|
||||
'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0', 'ER1', 'ER2', 'EY',
|
||||
'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1', 'IH2', 'IY', 'IY0', 'IY1',
|
||||
'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0',
|
||||
'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW',
|
||||
'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH'
|
||||
]
|
||||
|
||||
_valid_symbol_set = set(valid_symbols)
|
||||
|
||||
|
||||
class CMUDict:
|
||||
'''
|
||||
Thin wrapper around CMUDict data.
|
||||
http://www.speech.cs.cmu.edu/cgi-bin/cmudict
|
||||
'''
|
||||
'''Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict'''
|
||||
|
||||
def __init__(self, file_or_path, keep_ambiguous=True):
|
||||
if isinstance(file_or_path, str):
|
||||
|
@ -27,10 +24,7 @@ class CMUDict:
|
|||
else:
|
||||
entries = _parse_cmudict(file_or_path)
|
||||
if not keep_ambiguous:
|
||||
entries = {
|
||||
word: pron
|
||||
for word, pron in entries.items() if len(pron) == 1
|
||||
}
|
||||
entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
|
||||
self._entries = entries
|
||||
|
||||
def __len__(self):
|
|
@ -3,6 +3,7 @@
|
|||
import inflect
|
||||
import re
|
||||
|
||||
|
||||
_inflect = inflect.engine()
|
||||
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
|
||||
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
|
||||
|
@ -55,8 +56,7 @@ def _expand_number(m):
|
|||
elif num % 100 == 0:
|
||||
return _inflect.number_to_words(num // 100) + ' hundred'
|
||||
else:
|
||||
return _inflect.number_to_words(
|
||||
num, andword='', zero='oh', group=2).replace(', ', ' ')
|
||||
return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
|
||||
else:
|
||||
return _inflect.number_to_words(num, andword='')
|
||||
|
|
@ -1,9 +1,8 @@
|
|||
'''
|
||||
Defines the set of symbols used in text input to the model.
|
||||
|
||||
The default is a set of ASCII characters that works well for English or text
|
||||
that has been run through Unidecode. For other data, you can modify _characters.
|
||||
See TRAINING_DATA.md for details.
|
||||
The default is a set of ASCII characters that works well for English or text that has been run
|
||||
through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details.
|
||||
'''
|
||||
from .cmudict import valid_symbols
|
||||
|
Before Width: | Height: | Size: 447 KiB After Width: | Height: | Size: 447 KiB |
|
@ -9,12 +9,12 @@ import argparse
|
|||
import sys
|
||||
import io
|
||||
import numpy as np
|
||||
sys.path.append("../")
|
||||
# sys.path.append("../")
|
||||
from hparams import hparams, hparams_debug_string
|
||||
from data.data import TextDataSource, MelSpecDataSource
|
||||
from data import TextDataSource, MelSpecDataSource
|
||||
from nnmnkwii.datasets import FileSourceDataset
|
||||
from tqdm import trange
|
||||
from modules import frontend
|
||||
from parakeet import g2p as frontend
|
||||
|
||||
|
||||
def build_parser():
|
|
@ -25,7 +25,7 @@ import random
|
|||
|
||||
# import global hyper parameters
|
||||
from hparams import hparams
|
||||
from modules import frontend
|
||||
from parakeet import g2p as frontend
|
||||
import builder
|
||||
|
||||
_frontend = getattr(frontend, hparams.frontend)
|
|
@ -20,10 +20,10 @@ import paddle.fluid.dygraph as dg
|
|||
|
||||
import numpy as np
|
||||
|
||||
from modules import conv
|
||||
from parakeet.modules import conv
|
||||
|
||||
from modules.modules import Embedding, PositionEmbedding
|
||||
from modules.modules import FC, Conv1D, Conv1DGLU, Conv1DTranspose
|
||||
from parakeet.modules.modules import Embedding, PositionEmbedding
|
||||
from parakeet.modules.modules import FC, Conv1D, Conv1DGLU, Conv1DTranspose
|
||||
|
||||
ConvSpec = namedtuple("ConvSpec", ["out_channels", "filter_size", "dilation"])
|
||||
WindowRange = namedtuple("WindowRange", ["backward", "ahead"])
|
|
@ -17,7 +17,7 @@ from paddle import fluid
|
|||
import paddle.fluid.dygraph as dg
|
||||
|
||||
from hparams import hparams, hparams_debug_string
|
||||
from modules import frontend
|
||||
from parakeet import g2p as frontend
|
||||
from deepvoice3 import DeepVoiceTTS
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ from tensorboardX import SummaryWriter
|
|||
|
||||
# import global hyper parameters
|
||||
from hparams import hparams
|
||||
from modules import frontend
|
||||
from parakeet import g2p as frontend
|
||||
|
||||
_frontend = getattr(frontend, hparams.frontend)
|
||||
|
|
@ -28,9 +28,9 @@ import nltk
|
|||
from paddle import fluid
|
||||
import paddle.fluid.dygraph as dg
|
||||
|
||||
sys.path.append("../")
|
||||
# sys.path.append("../")
|
||||
import audio
|
||||
from modules import frontend
|
||||
from parakeet import g2p as frontend
|
||||
import dry_run
|
||||
|
||||
from hparams import hparams
|
|
@ -23,20 +23,20 @@ from paddle import fluid
|
|||
import paddle.fluid.dygraph as dg
|
||||
|
||||
import sys
|
||||
sys.path.append("../")
|
||||
# sys.path.append("../")
|
||||
from argparse import ArgumentParser
|
||||
from hparams import hparams, hparams_debug_string
|
||||
|
||||
from nnmnkwii.datasets import FileSourceDataset
|
||||
from data.data import (TextDataSource, MelSpecDataSource,
|
||||
from data import (TextDataSource, MelSpecDataSource,
|
||||
LinearSpecDataSource,
|
||||
PartialyRandomizedSimilarTimeLengthSampler,
|
||||
Dataset, make_loader, create_batch)
|
||||
from modules import frontend
|
||||
from parakeet import g2p as frontend
|
||||
from builder import deepvoice3, WindowRange
|
||||
from dry_run import dry_run
|
||||
from train_model import train_model
|
||||
from modules.loss import TTSLoss
|
||||
from parakeet.modules.loss import TTSLoss
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
|
|
@ -19,7 +19,7 @@ import paddle
|
|||
from paddle import fluid
|
||||
import paddle.fluid.dygraph as dg
|
||||
|
||||
from weight_norm import Conv2D, Conv2DTranspose
|
||||
from .weight_norm import Conv2D, Conv2DTranspose
|
||||
|
||||
|
||||
class Conv1D(dg.Layer):
|
|
@ -18,8 +18,8 @@ import paddle.fluid.dygraph as dg
|
|||
|
||||
import numpy as np
|
||||
|
||||
import conv
|
||||
import weight_norm as weight_norm
|
||||
from . import conv
|
||||
from . import weight_norm
|
||||
|
||||
|
||||
def FC(name_scope,
|
|
@ -0,0 +1,48 @@
|
|||
import os
|
||||
import io
|
||||
import re
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
def read(*names, **kwargs):
|
||||
with io.open(
|
||||
os.path.join(os.path.dirname(__file__), *names),
|
||||
encoding=kwargs.get("encoding", "utf8")
|
||||
) as fp:
|
||||
return fp.read()
|
||||
|
||||
|
||||
def find_version(*file_paths):
|
||||
version_file = read(*file_paths)
|
||||
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
|
||||
version_file, re.M)
|
||||
if version_match:
|
||||
return version_match.group(1)
|
||||
raise RuntimeError("Unable to find version string.")
|
||||
|
||||
VERSION = find_version('parakeet', '__init__.py')
|
||||
long_description = read('README.md')
|
||||
|
||||
setup_info = dict(
|
||||
# Metadata
|
||||
name='parakeet',
|
||||
version=VERSION,
|
||||
author='PaddleSL Team',
|
||||
author_email='',
|
||||
url='https://github.com/PaddlePaddle',
|
||||
description='Speech synthesis tools and models based on Paddlepaddle',
|
||||
long_description=long_description,
|
||||
license='Apache 2',
|
||||
|
||||
install_requires=[
|
||||
'numpy', 'nltk', 'inflect', 'librosa', 'unidecode', 'numba',
|
||||
'tqdm', 'matplotlib', 'tensorboardX', 'tensorboard', 'scipy',
|
||||
'ruamel.yaml', 'pandas', 'sox',
|
||||
],
|
||||
|
||||
# Package info
|
||||
packages=find_packages(exclude=('tests', 'tests.*')),
|
||||
|
||||
zip_safe=True,
|
||||
)
|
||||
|
||||
setup(**setup_info)
|
|
@ -0,0 +1,10 @@
|
|||
from parakeet.datasets.ljspeech import LJSpeech
|
||||
from parakeet.data.datacargo import DataCargo
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
LJSPEECH_ROOT = Path("/workspace/datasets/LJSpeech-1.1")
|
||||
ljspeech = LJSpeech(LJSPEECH_ROOT)
|
||||
ljspeech_cargo = DataCargo(ljspeech, batch_size=16, shuffle=True)
|
||||
for i, batch in enumerate(ljspeech_cargo):
|
||||
print(i)
|
|
@ -0,0 +1,11 @@
|
|||
from parakeet.datasets import vctk
|
||||
from pathlib import Path
|
||||
from parakeet.data.datacargo import DataCargo
|
||||
|
||||
root = Path("/workspace/datasets/VCTK-Corpus")
|
||||
vctk_dataset = vctk.VCTK(root)
|
||||
vctk_cargo = DataCargo(vctk_dataset, batch_size=16, shuffle=True, drop_last=True)
|
||||
|
||||
for i, batch in enumerate(vctk_cargo):
|
||||
print(i)
|
||||
|
Loading…
Reference in New Issue