2020-10-10 15:51:54 +08:00
|
|
|
|
|
|
|
"""
|
|
|
|
This modules contains normalizers for spectrogram magnitude.
|
|
|
|
Normalizers are invertible transformations. They can be used to process
|
|
|
|
magnitude of spectrogram before training and can also be used to recover from
|
|
|
|
the generated spectrogram so as to be used with vocoders like griffin lim.
|
|
|
|
|
|
|
|
The base class describe the interface. `transform` is used to perform
|
|
|
|
transformation and `inverse` is used to perform the inverse transformation.
|
2020-10-22 13:04:45 +08:00
|
|
|
|
|
|
|
check issues:
|
|
|
|
https://github.com/mozilla/TTS/issues/377
|
2020-10-10 15:51:54 +08:00
|
|
|
"""
|
|
|
|
import numpy as np
|
|
|
|
|
2020-12-09 15:58:39 +08:00
|
|
|
__all__ = ["NormalizerBase", "LogMagnitude", "UnitMagnitude"]
|
|
|
|
|
|
|
|
|
2020-10-10 15:51:54 +08:00
|
|
|
class NormalizerBase(object):
|
|
|
|
def transform(self, spec):
|
|
|
|
raise NotImplementedError("transform must be implemented")
|
|
|
|
|
|
|
|
def inverse(self, normalized):
|
|
|
|
raise NotImplementedError("inverse must be implemented")
|
|
|
|
|
|
|
|
class LogMagnitude(NormalizerBase):
|
2020-10-22 13:04:45 +08:00
|
|
|
"""
|
|
|
|
This is a simple normalizer used in Waveglow, Waveflow, tacotron2...
|
|
|
|
"""
|
2020-10-10 15:51:54 +08:00
|
|
|
def __init__(self, min=1e-7):
|
|
|
|
self.min = min
|
|
|
|
|
|
|
|
def transform(self, x):
|
|
|
|
x = np.maximum(x, self.min)
|
|
|
|
x = np.log(x)
|
|
|
|
return x
|
|
|
|
|
|
|
|
def inverse(self, x):
|
|
|
|
return np.exp(x)
|
2020-10-22 13:04:45 +08:00
|
|
|
|
|
|
|
|
2020-10-10 15:51:54 +08:00
|
|
|
class UnitMagnitude(NormalizerBase):
|
|
|
|
# dbscale and (0, 1) normalization
|
2020-10-22 13:04:45 +08:00
|
|
|
"""
|
|
|
|
This is the normalizer used in the
|
|
|
|
"""
|
2020-11-19 20:17:42 +08:00
|
|
|
def __init__(self, min=1e-5):
|
|
|
|
self.min = min
|
|
|
|
|
|
|
|
def transform(self, x):
|
|
|
|
db_scale = 20 * np.log10(np.maximum(self.min, x)) - 20
|
|
|
|
normalized = (db_scale + 100) / 100
|
|
|
|
clipped = np.clip(normalized, 0, 1)
|
|
|
|
return clipped
|
|
|
|
|
|
|
|
def inverse(self, x):
|
|
|
|
denormalized = np.clip(x, 0, 1) * 100 - 100
|
|
|
|
out = np.exp((denormalized + 20) / 20 * np.log(10))
|
|
|
|
return out
|