update docstring of tacotron2

This commit is contained in:
lfchener 2020-12-18 15:50:05 +08:00
parent ecdeb14a40
commit 6b8573898a
1 changed files with 37 additions and 42 deletions

View File

@ -27,19 +27,18 @@ __all__ = ["Tacotron2", "Tacotron2Loss"]
class DecoderPreNet(nn.Layer): class DecoderPreNet(nn.Layer):
""" """Decoder prenet module for Tacotron2.
Decoder prenet module for Tacotron2.
Parameters Parameters
---------- ----------
d_input: int d_input: int
input dimension input feature size
d_hidden: int d_hidden: int
hidden size hidden size
d_output: int d_output: int
output Dimension output feature size
dropout_rate: float dropout_rate: float
droput probability droput probability
@ -62,12 +61,12 @@ class DecoderPreNet(nn.Layer):
Parameters Parameters
---------- ----------
x: Tensor[shape=(B, T_mel, C)] x: Tensor [shape=(B, T_mel, C)]
batch of the sequences of padded mel spectrogram batch of the sequences of padded mel spectrogram
Returns Returns
------- -------
output: Tensor[shape=(B, T_mel, C)] output: Tensor [shape=(B, T_mel, C)]
batch of the sequences of padded hidden state batch of the sequences of padded hidden state
""" """
@ -78,8 +77,7 @@ class DecoderPreNet(nn.Layer):
class DecoderPostNet(nn.Layer): class DecoderPostNet(nn.Layer):
""" """Decoder postnet module for Tacotron2.
Decoder postnet module for Tacotron2.
Parameters Parameters
---------- ----------
@ -151,12 +149,12 @@ class DecoderPostNet(nn.Layer):
Parameters Parameters
---------- ----------
input: Tensor[shape=(B, T_mel, C)] input: Tensor [shape=(B, T_mel, C)]
output sequence of features from decoder output sequence of features from decoder
Returns Returns
------- -------
output: Tensor[shape=(B, T_mel, C)] output: Tensor [shape=(B, T_mel, C)]
output sequence of features after postnet output sequence of features after postnet
""" """
@ -170,8 +168,7 @@ class DecoderPostNet(nn.Layer):
class Tacotron2Encoder(nn.Layer): class Tacotron2Encoder(nn.Layer):
""" """Tacotron2 encoder module for Tacotron2.
Tacotron2 encoder module for Tacotron2.
Parameters Parameters
---------- ----------
@ -218,15 +215,15 @@ class Tacotron2Encoder(nn.Layer):
Parameters Parameters
---------- ----------
x: Tensor[shape=(B, T)] x: Tensor [shape=(B, T)]
batch of the sequencees of padded character ids batch of the sequencees of padded character ids
text_lens: Tensor[shape=(B,)] text_lens: Tensor [shape=(B,)]
batch of lengths of each text input batch. batch of lengths of each text input batch.
Returns Returns
------- -------
output : Tensor[shape=(B, T, C)] output : Tensor [shape=(B, T, C)]
batch of the sequences of padded hidden states batch of the sequences of padded hidden states
""" """
@ -239,8 +236,7 @@ class Tacotron2Encoder(nn.Layer):
class Tacotron2Decoder(nn.Layer): class Tacotron2Decoder(nn.Layer):
""" """Tacotron2 decoder module for Tacotron2.
Tacotron2 decoder module for Tacotron2.
Parameters Parameters
---------- ----------
@ -278,7 +274,8 @@ class Tacotron2Decoder(nn.Layer):
droput probability in location sensitive attention droput probability in location sensitive attention
p_decoder_dropout: float p_decoder_dropout: float
droput probability in decoder""" droput probability in decoder
"""
def __init__(self, def __init__(self,
d_mels: int, d_mels: int,
@ -396,15 +393,14 @@ class Tacotron2Decoder(nn.Layer):
Returns Returns
------- -------
mel_output: Tensor[shape=(B, T_mel, C)] mel_output: Tensor [shape=(B, T_mel, C)]
output sequence of features output sequence of features
stop_logits: Tensor[shape=(B, T_mel)] stop_logits: Tensor [shape=(B, T_mel)]
output sequence of stop logits output sequence of stop logits
alignments: Tensor[shape=(B, T_mel, T_text)] alignments: Tensor [shape=(B, T_mel, T_text)]
attention weights attention weights
""" """
querys = paddle.reshape( querys = paddle.reshape(
querys, querys,
@ -441,7 +437,7 @@ class Tacotron2Decoder(nn.Layer):
Parameters Parameters
---------- ----------
keys: Tensor[shape=(B, T_text, C)] keys: Tensor [shape=(B, T_text, C)]
batch of the sequences of padded output from encoder batch of the sequences of padded output from encoder
stop_threshold: float stop_threshold: float
@ -452,13 +448,13 @@ class Tacotron2Decoder(nn.Layer):
Returns Returns
------- -------
mel_output: Tensor[shape=(B, T_mel, C)] mel_output: Tensor [shape=(B, T_mel, C)]
output sequence of features output sequence of features
stop_logits: Tensor[shape=(B, T_mel)] stop_logits: Tensor [shape=(B, T_mel)]
output sequence of stop logits output sequence of stop logits
alignments: Tensor[shape=(B, T_mel, T_text)] alignments: Tensor [shape=(B, T_mel, T_text)]
attention weights attention weights
""" """
@ -494,12 +490,11 @@ class Tacotron2Decoder(nn.Layer):
class Tacotron2(nn.Layer): class Tacotron2(nn.Layer):
""" """Tacotron2 model for end-to-end text-to-speech (E2E-TTS).
Tacotron2 model for end-to-end text-to-speech (E2E-TTS).
This is a model of Spectrogram prediction network in Tacotron2 described This is a model of Spectrogram prediction network in Tacotron2 described
in `Natural TTS Synthesis in `Natural TTS Synthesis
by Conditioning WaveNet on Mel Spectrogram Predictions`, by Conditioning WaveNet on Mel Spectrogram Predictions`_,
which converts the sequence of characters which converts the sequence of characters
into the sequence of mel spectrogram. into the sequence of mel spectrogram.
@ -620,16 +615,16 @@ class Tacotron2(nn.Layer):
Parameters Parameters
---------- ----------
text_inputs: Tensor[shape=(B, T_text)] text_inputs: Tensor [shape=(B, T_text)]
batch of the sequencees of padded character ids batch of the sequencees of padded character ids
mels: Tensor[shape(B, T_mel, C)] mels: Tensor [shape(B, T_mel, C)]
batch of the sequences of padded mel spectrogram batch of the sequences of padded mel spectrogram
text_lens: Tensor[shape=(B,)] text_lens: Tensor [shape=(B,)]
batch of lengths of each text input batch. batch of lengths of each text input batch.
output_lens: Tensor[shape=(B,)] output_lens: Tensor [shape=(B,)]
batch of lengths of each mels batch. batch of lengths of each mels batch.
Returns Returns
@ -679,7 +674,7 @@ class Tacotron2(nn.Layer):
Parameters Parameters
---------- ----------
text_inputs: Tensor[shape=(B, T_text)] text_inputs: Tensor [shape=(B, T_text)]
batch of the sequencees of padded character ids batch of the sequencees of padded character ids
stop_threshold: float stop_threshold: float
@ -765,10 +760,10 @@ class Tacotron2(nn.Layer):
Returns Returns
------- -------
mel_outputs_postnet: Tensor[shape=(T_mel, C)] mel_outputs_postnet: Tensor [shape=(T_mel, C)]
output sequence of sepctrogram after postnet output sequence of sepctrogram after postnet
alignments: Tensor[shape=(T_mel, T_text)] alignments: Tensor [shape=(T_mel, T_text)]
attention weights attention weights
""" """
model = cls(frontend, model = cls(frontend,
@ -809,24 +804,24 @@ class Tacotron2Loss(nn.Layer):
Parameters Parameters
---------- ----------
mel_outputs: Tensor[shape=(B, T_mel, C)] mel_outputs: Tensor [shape=(B, T_mel, C)]
output mel spectrogram sequence output mel spectrogram sequence
mel_outputs_postnet: Tensor[shape(B, T_mel, C)] mel_outputs_postnet: Tensor [shape(B, T_mel, C)]
output mel spectrogram sequence after postnet output mel spectrogram sequence after postnet
stop_logits: Tensor[shape=(B, T_mel)] stop_logits: Tensor [shape=(B, T_mel)]
output sequence of stop logits befor sigmoid output sequence of stop logits befor sigmoid
mel_targets: Tensor[shape=(B,)] mel_targets: Tensor [shape=(B, T_mel, C)]
target mel spectrogram sequence target mel spectrogram sequence
stop_tokens: stop_tokens: Tensor [shape=(B,)]
target stop token target stop token
Returns Returns
------- -------
losses : Dict[str, float] losses : Dict[str, Tensor]
loss: the sum of the other three losses loss: the sum of the other three losses