format docstrings

This commit is contained in:
TianYuan 2021-07-13 07:55:56 +00:00
parent 3af3c29a94
commit 6553d1d723
15 changed files with 597 additions and 371 deletions

View File

@ -12,28 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Fastspeech2 related modules for paddle"""
import logging
import numpy as np
from typing import Dict
from typing import Sequence
from typing import Tuple
from typeguard import check_argument_types
import paddle
import numpy as np
from paddle import nn
from parakeet.modules.fastspeech2_predictor.duration_predictor import DurationPredictor
from parakeet.modules.fastspeech2_predictor.duration_predictor import DurationPredictorLoss
from parakeet.modules.fastspeech2_predictor.variance_predictor import VariancePredictor
from parakeet.modules.fastspeech2_predictor.length_regulator import LengthRegulator
from parakeet.modules.fastspeech2_predictor.postnet import Postnet
from parakeet.modules.nets_utils import make_non_pad_mask
from parakeet.modules.nets_utils import make_pad_mask
from parakeet.modules.fastspeech2_predictor.variance_predictor import VariancePredictor
from parakeet.modules.fastspeech2_transformer.embedding import PositionalEncoding
from parakeet.modules.fastspeech2_transformer.embedding import ScaledPositionalEncoding
from parakeet.modules.fastspeech2_transformer.encoder import Encoder as TransformerEncoder
from parakeet.modules.nets_utils import initialize
from parakeet.modules.nets_utils import make_non_pad_mask
from parakeet.modules.nets_utils import make_pad_mask
class FastSpeech2(nn.Layer):
@ -155,7 +153,6 @@ class FastSpeech2(nn.Layer):
positionwise_layer_type=positionwise_layer_type,
positionwise_conv_kernel_size=positionwise_conv_kernel_size, )
else:
print("encoder_type:", encoder_type)
raise ValueError(f"{encoder_type} is not supported.")
# define duration predictor
@ -236,6 +233,12 @@ class FastSpeech2(nn.Layer):
use_batch_norm=use_batch_norm,
dropout_rate=postnet_dropout_rate, ))
# initialize parameters
self._reset_parameters(
init_type=init_type,
init_enc_alpha=init_enc_alpha,
init_dec_alpha=init_dec_alpha, )
# define criterions
self.criterion = FastSpeech2Loss(
use_masking=use_masking, use_weighted_masking=use_weighted_masking)
@ -253,25 +256,37 @@ class FastSpeech2(nn.Layer):
energy: paddle.Tensor,
energy_lengths: paddle.Tensor, ) -> Tuple[paddle.Tensor, Dict[
str, paddle.Tensor], paddle.Tensor]:
# """Calculate forward propagation.
"""Calculate forward propagation.
# Args:
# text (LongTensor): Batch of padded token ids (B, Tmax).
# text_lengths (LongTensor): Batch of lengths of each input (B,).
# speech (Tensor): Batch of padded target features (B, Lmax, odim).
# speech_lengths (LongTensor): Batch of the lengths of each target (B,).
# durations (LongTensor): Batch of padded durations (B, Tmax + 1).
# durations_lengths (LongTensor): Batch of duration lengths (B, Tmax + 1).
# pitch (Tensor): Batch of padded token-averaged pitch (B, Tmax + 1, 1).
# pitch_lengths (LongTensor): Batch of pitch lengths (B, Tmax + 1).
# energy (Tensor): Batch of padded token-averaged energy (B, Tmax + 1, 1).
# energy_lengths (LongTensor): Batch of energy lengths (B, Tmax + 1).
# Returns:
# Tensor: Loss scalar value.
# Dict: Statistics to be monitored.
# Tensor: Weight value.
# """
Parameters
----------
text : LongTensor
Batch of padded token ids (B, Tmax).
text_lengths : LongTensor)
Batch of lengths of each input (B,).
speech : Tensor
Batch of padded target features (B, Lmax, odim).
speech_lengths : LongTensor
Batch of the lengths of each target (B,).
durations : LongTensor
Batch of padded durations (B, Tmax + 1).
durations_lengths : LongTensor
Batch of duration lengths (B, Tmax + 1).
pitch : Tensor
Batch of padded token-averaged pitch (B, Tmax + 1, 1).
pitch_lengths : LongTensor
Batch of pitch lengths (B, Tmax + 1).
energy : Tensor
Batch of padded token-averaged energy (B, Tmax + 1, 1).
energy_lengths : LongTensor
Batch of energy lengths (B, Tmax + 1).
Returns
----------
Tensor
Loss scalar value.
Dict
Statistics to be monitored.
"""
text = text[:, :text_lengths.max()] # for data-parallel
speech = speech[:, :speech_lengths.max()] # for data-parallel
durations = durations[:, :durations_lengths.max()] # for data-parallel
@ -282,16 +297,11 @@ class FastSpeech2(nn.Layer):
# Add eos at the last of sequence
# xs = F.pad(text, [0, 1], "constant", self.padding_idx)
print("xs.shape in fastspeech2.py before:", text.shape, text)
xs = np.pad(text.numpy(),
pad_width=((0, 0), (0, 1)),
mode="constant",
constant_values=self.padding_idx)
xs = paddle.to_tensor(xs)
print("xs.shape in fastspeech2.py end:", xs.shape, xs)
# my_pad = nn.Pad1D(padding=[0, 1], mode="constant", value=self.padding_idx)
# xs = my_pad(text)
# 是否会数组越界? xs 是否能取到 l -> 可以,因为上一步补充了一个 padding_idx又变成了 eos
for i, l in enumerate(text_lengths):
xs[i, l] = self.eos
ilens = text_lengths + 1
@ -302,23 +312,16 @@ class FastSpeech2(nn.Layer):
# forward propagation
before_outs, after_outs, d_outs, p_outs, e_outs = self._forward(
xs, ilens, ys, olens, ds, ps, es, is_inference=False)
print("d_outs in paddle:", d_outs)
print("p_outs in paddle:", p_outs)
print("e_outs in paddle:", e_outs)
# modify mod part of groundtruth
if self.reduction_factor > 1:
# 需要改
olens = paddle.to_tensor([
olen - olen % self.reduction_factor for olen in olens.numpy()
])
max_olen = max(olens)
ys = ys[:, :max_olen]
# calculate loss
if self.postnet is None:
after_outs = None
# calculate loss
l1_loss, duration_loss, pitch_loss, energy_loss = self.criterion(
after_outs=after_outs,
@ -363,9 +366,8 @@ class FastSpeech2(nn.Layer):
alpha: float=1.0, ) -> Sequence[paddle.Tensor]:
# forward encoder
x_masks = self._source_mask(ilens)
print("xs.shape in fastspeech2.py:", xs.shape)
hs, _ = self.encoder(xs, x_masks) # (B, Tmax, adim)
hs, _ = self.encoder(xs, x_masks) # (B, Tmax, adim)
# forward duration predictor and variance predictors
d_masks = make_pad_mask(ilens)
@ -377,10 +379,11 @@ class FastSpeech2(nn.Layer):
e_outs = self.energy_predictor(hs.detach(), d_masks.unsqueeze(-1))
else:
e_outs = self.energy_predictor(hs, d_masks.unsqueeze(-1))
print("p_outs.shape:", p_outs.shape)
if is_inference:
d_outs = self.duration_predictor.inference(hs,
d_masks) # (B, Tmax)
# print("d_outs:",d_outs)
# use prediction in inference
# (B, Tmax, 1)
@ -404,7 +407,6 @@ class FastSpeech2(nn.Layer):
# forward decoder
if olens is not None and not is_inference:
if self.reduction_factor > 1:
# 直接to_paddle ,维度会增加 1,需要先转成 numpy
olens_in = paddle.to_tensor(
[olen // self.reduction_factor for olen in olens.numpy()])
else:
@ -412,9 +414,10 @@ class FastSpeech2(nn.Layer):
h_masks = self._source_mask(olens_in)
else:
h_masks = None
zs, _ = self.decoder(hs, h_masks) # (B, Lmax, adim)
before_outs = self.feat_out(zs).reshape(
(zs.shape[0], -1, self.odim)) # (B, Lmax, odim)
# (B, Lmax, adim)
zs, _ = self.decoder(hs, h_masks)
# (B, Lmax, odim)
before_outs = self.feat_out(zs).reshape((zs.shape[0], -1, self.odim))
# postnet -> (B, Lmax//r * r, odim)
if self.postnet is None:
@ -437,20 +440,30 @@ class FastSpeech2(nn.Layer):
paddle.Tensor, paddle.Tensor, paddle.Tensor]:
"""Generate the sequence of features given the sequences of characters.
Args:
text (LongTensor): Input sequence of characters (T,).
speech (Tensor, optional): Feature sequence to extract style (N, idim).
durations (LongTensor, optional): Groundtruth of duration (T + 1,).
pitch (Tensor, optional): Groundtruth of token-averaged pitch (T + 1, 1).
energy (Tensor, optional): Groundtruth of token-averaged energy (T + 1, 1).
alpha (float, optional): Alpha to control the speed.
use_teacher_forcing (bool, optional): Whether to use teacher forcing.
If true, groundtruth of duration, pitch and energy will be used.
Parameters
----------
text : LongTensor
Input sequence of characters (T,).
speech : Tensor, optional
Feature sequence to extract style (N, idim).
durations : LongTensor, optional
Groundtruth of duration (T + 1,).
pitch : Tensor, optional
Groundtruth of token-averaged pitch (T + 1, 1).
energy : Tensor, optional
Groundtruth of token-averaged energy (T + 1, 1).
alpha : float, optional
Alpha to control the speed.
use_teacher_forcing : bool, optional
Whether to use teacher forcing.
If true, groundtruth of duration, pitch and energy will be used.
Returns:
Tensor: Output sequence of features (L, odim).
None: Dummy for compatibility.
None: Dummy for compatibility.
Returns
----------
Tensor
Output sequence of features (L, odim).
None
Dummy for compatibility.
"""
x, y = text, speech
@ -460,13 +473,15 @@ class FastSpeech2(nn.Layer):
x = np.pad(text.numpy(),
pad_width=((0, 1)),
mode="constant",
constant_values=self.padding_idx)
constant_values=self.eos)
x = paddle.to_tensor(x)
# setup batch axis
ilens = paddle.to_tensor(
[x.shape[0]], dtype=paddle.int64, place=x.place)
xs, ys = x.unsqueeze(0), None
if y is not None:
ys = y.unsqueeze(0)
@ -493,14 +508,19 @@ class FastSpeech2(nn.Layer):
def _source_mask(self, ilens: paddle.Tensor) -> paddle.Tensor:
"""Make masks for self-attention.
Args:
ilens (LongTensor): Batch of lengths (B,).
Parameters
----------
ilens : LongTensor
Batch of lengths (B,).
Returns:
Tensor: Mask tensor for self-attention.
Returns
-------
Tensor
Mask tensor for self-attention.
dtype=paddle.bool
Examples:
Examples
-------
>>> ilens = [5, 3]
>>> self._source_mask(ilens)
tensor([[[1, 1, 1, 1, 1],
@ -510,6 +530,29 @@ class FastSpeech2(nn.Layer):
x_masks = make_non_pad_mask(ilens)
return x_masks.unsqueeze(-2)
def _reset_parameters(self,
init_type: str,
init_enc_alpha: float,
init_dec_alpha: float):
# initialize parameters
initialize(self, init_type)
# initialize alpha in scaled positional encoding
if self.encoder_type == "transformer" and self.use_scaled_pos_enc:
init_enc_alpha = paddle.to_tensor(init_enc_alpha)
self.encoder.embed[-1].alpha = paddle.create_parameter(
shape=init_enc_alpha.shape,
dtype=str(init_enc_alpha.numpy().dtype),
default_initializer=paddle.nn.initializer.Assign(
init_enc_alpha))
if self.decoder_type == "transformer" and self.use_scaled_pos_enc:
init_dec_alpha = paddle.to_tensor(init_dec_alpha)
self.decoder.embed[-1].alpha = paddle.create_parameter(
shape=init_dec_alpha.shape,
dtype=str(init_dec_alpha.numpy().dtype),
default_initializer=paddle.nn.initializer.Assign(
init_dec_alpha))
class FastSpeech2Loss(nn.Layer):
"""Loss function module for FastSpeech2."""
@ -519,12 +562,12 @@ class FastSpeech2Loss(nn.Layer):
use_weighted_masking: bool=False):
"""Initialize feed-forward Transformer loss module.
Args:
use_masking (bool):
Parameters
----------
use_masking : bool
Whether to apply masking for padded part in loss calculation.
use_weighted_masking (bool):
use_weighted_masking : bool
Whether to weighted masking in loss calculation.
"""
assert check_argument_types()
super().__init__()
@ -555,24 +598,41 @@ class FastSpeech2Loss(nn.Layer):
paddle.Tensor, paddle.Tensor]:
"""Calculate forward propagation.
Args:
after_outs (Tensor): Batch of outputs after postnets (B, Lmax, odim).
before_outs (Tensor): Batch of outputs before postnets (B, Lmax, odim).
d_outs (LongTensor): Batch of outputs of duration predictor (B, Tmax).
p_outs (Tensor): Batch of outputs of pitch predictor (B, Tmax, 1).
e_outs (Tensor): Batch of outputs of energy predictor (B, Tmax, 1).
ys (Tensor): Batch of target features (B, Lmax, odim).
ds (LongTensor): Batch of durations (B, Tmax).
ps (Tensor): Batch of target token-averaged pitch (B, Tmax, 1).
es (Tensor): Batch of target token-averaged energy (B, Tmax, 1).
ilens (LongTensor): Batch of the lengths of each input (B,).
olens (LongTensor): Batch of the lengths of each target (B,).
Parameters
----------
after_outs : Tensor
Batch of outputs after postnets (B, Lmax, odim).
before_outs : Tensor
Batch of outputs before postnets (B, Lmax, odim).
d_outs : LongTensor
Batch of outputs of duration predictor (B, Tmax).
p_outs : Tensor
Batch of outputs of pitch predictor (B, Tmax, 1).
e_outs : Tensor
Batch of outputs of energy predictor (B, Tmax, 1).
ys : Tensor
Batch of target features (B, Lmax, odim).
ds : LongTensor
Batch of durations (B, Tmax).
ps : Tensor
Batch of target token-averaged pitch (B, Tmax, 1).
es : Tensor
Batch of target token-averaged energy (B, Tmax, 1).
ilens : LongTensor
Batch of the lengths of each input (B,).
olens : LongTensor
Batch of the lengths of each target (B,).
Returns:
Tensor: L1 loss value.
Tensor: Duration predictor loss value.
Tensor: Pitch predictor loss value.
Tensor: Energy predictor loss value.
Returns
----------
Tensor
L1 loss value.
Tensor
Duration predictor loss value.
Tensor
Pitch predictor loss value.
Tensor
Energy predictor loss value.
"""
# apply mask to remove padded part

View File

@ -15,7 +15,6 @@
import paddle
from paddle import nn
from parakeet.modules.layer_norm import LayerNorm
from parakeet.modules.masked_fill import masked_fill
@ -31,7 +30,8 @@ class DurationPredictor(nn.Layer):
.. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:
https://arxiv.org/pdf/1905.09263.pdf
Note:
Note
----------
The calculation domain of outputs is different
between in `forward` and in `inference`. In `forward`,
the outputs are calculated in log domain but in `inference`,
@ -48,13 +48,20 @@ class DurationPredictor(nn.Layer):
offset=1.0):
"""Initilize duration predictor module.
Args:
idim (int): Input dimension.
n_layers (int, optional): Number of convolutional layers.
n_chans (int, optional): Number of channels of convolutional layers.
kernel_size (int, optional): Kernel size of convolutional layers.
dropout_rate (float, optional): Dropout rate.
offset (float, optional): Offset value to avoid nan in log domain.
Parameters
----------
idim : int
Input dimension.
n_layers : int, optional
Number of convolutional layers.
n_chans : int, optional
Number of channels of convolutional layers.
kernel_size : int, optional
Kernel size of convolutional layers.
dropout_rate : float, optional
Dropout rate.
offset : float, optional
Offset value to avoid nan in log domain.
"""
super(DurationPredictor, self).__init__()
@ -74,7 +81,7 @@ class DurationPredictor(nn.Layer):
LayerNorm(
n_chans, dim=1),
nn.Dropout(dropout_rate), ))
self.linear = nn.Linear(n_chans, 1)
self.linear = nn.Linear(n_chans, 1, bias_attr=True)
def _forward(self, xs, x_masks=None, is_inference=False):
# (B, idim, Tmax)
@ -83,7 +90,7 @@ class DurationPredictor(nn.Layer):
for f in self.conv:
xs = f(xs)
# NOTE: calculate in log domain
# NOTE: calculate in log domain
# (B, Tmax)
xs = self.linear(xs.transpose([0, 2, 1])).squeeze(-1)
@ -99,28 +106,34 @@ class DurationPredictor(nn.Layer):
def forward(self, xs, x_masks=None):
"""Calculate forward propagation.
Args:
xs (Tensor): Batch of input sequences (B, Tmax, idim).
x_masks (ByteTensor, optional):
Parameters
----------
xs : Tensor
Batch of input sequences (B, Tmax, idim).
x_masks : ByteTensor, optional
Batch of masks indicating padded part (B, Tmax).
Returns:
Tensor: Batch of predicted durations in log domain (B, Tmax).
Returns
----------
Tensor
Batch of predicted durations in log domain (B, Tmax).
"""
return self._forward(xs, x_masks, False)
def inference(self, xs, x_masks=None):
"""Inference duration.
Args:
xs (Tensor): Batch of input sequences (B, Tmax, idim).
x_masks (ByteTensor, optional):
Parameters
----------
xs : Tensor
Batch of input sequences (B, Tmax, idim).
x_masks : Tensor(bool), optional
Batch of masks indicating padded part (B, Tmax).
Returns:
LongTensor: Batch of predicted durations in linear domain int64 (B, Tmax).
Returns
----------
LongTensor
Batch of predicted durations in linear domain int64 (B, Tmax).
"""
return self._forward(xs, x_masks, True)
@ -135,10 +148,12 @@ class DurationPredictorLoss(nn.Layer):
def __init__(self, offset=1.0, reduction="mean"):
"""Initilize duration predictor loss module.
Args:
offset (float, optional): Offset value to avoid nan in log domain.
reduction (str): Reduction type in loss calculation.
Parameters
----------
offset : float, optional
Offset value to avoid nan in log domain.
reduction : str
Reduction type in loss calculation.
"""
super(DurationPredictorLoss, self).__init__()
self.criterion = nn.MSELoss(reduction=reduction)
@ -147,16 +162,21 @@ class DurationPredictorLoss(nn.Layer):
def forward(self, outputs, targets):
"""Calculate forward propagation.
Args:
outputs (Tensor): Batch of prediction durations in log domain (B, T)
targets (LongTensor): Batch of groundtruth durations in linear domain (B, T)
Parameters
----------
outputs : Tensor
Batch of prediction durations in log domain (B, T)
targets : LongTensor
Batch of groundtruth durations in linear domain (B, T)
Returns:
Tensor: Mean squared error loss value.
Returns
----------
Tensor
Mean squared error loss value.
Note:
Note
----------
`outputs` is in log domain but `targets` is in linear domain.
"""
# NOTE: outputs is in log domain while targets in linear
targets = paddle.log(targets.cast(dtype='float32') + self.offset)

View File

@ -13,8 +13,6 @@
# limitations under the License.
"""Length regulator related modules."""
import logging
import numpy as np
import paddle
from paddle import nn
@ -37,8 +35,10 @@ class LengthRegulator(nn.Layer):
def __init__(self, pad_value=0.0):
"""Initilize length regulator module.
Args:
pad_value (float, optional): Value used for padding.
Parameters
----------
pad_value : float, optional
Value used for padding.
"""
super().__init__()
@ -68,14 +68,19 @@ class LengthRegulator(nn.Layer):
def forward(self, xs, ds, alpha=1.0):
"""Calculate forward propagation.
Args:
xs (Tensor): Batch of sequences of char or phoneme embeddings (B, Tmax, D).
ds (LongTensor): Batch of durations of each frame (B, T).
alpha (float, optional): Alpha value to control speed of speech.
Returns:
Tensor: replicated input tensor based on durations (B, T*, D).
Parameters
----------
xs : Tensor
Batch of sequences of char or phoneme embeddings (B, Tmax, D).
ds : LongTensor
Batch of durations of each frame (B, T).
alpha : float, optional
Alpha value to control speed of speech.
Returns
----------
Tensor
replicated input tensor based on durations (B, T*, D).
"""
if alpha != 1.0:
assert alpha > 0

View File

@ -43,15 +43,22 @@ class Postnet(nn.Layer):
use_batch_norm=True, ):
"""Initialize postnet module.
Args:
idim (int): Dimension of the inputs.
odim (int): Dimension of the outputs.
n_layers (int, optional): The number of layers.
n_filts (int, optional): The number of filter size.
n_units (int, optional): The number of filter channels.
use_batch_norm (bool, optional): Whether to use batch normalization..
dropout_rate (float, optional): Dropout rate..
Parameters
----------
idim : int
Dimension of the inputs.
odim : int
Dimension of the outputs.
n_layers : int, optional
The number of layers.
n_filts : int, optional
The number of filter size.
n_units : int, optional
The number of filter channels.
use_batch_norm : bool, optional
Whether to use batch normalization..
dropout_rate : float, optional
Dropout rate..
"""
super(Postnet, self).__init__()
self.postnet = nn.LayerList()
@ -111,11 +118,15 @@ class Postnet(nn.Layer):
def forward(self, xs):
"""Calculate forward propagation.
Args:
xs (Tensor): Batch of the sequences of padded input tensors (B, idim, Tmax).
Parameters
----------
xs : Tensor
Batch of the sequences of padded input tensors (B, idim, Tmax).
Returns:
Tensor: Batch of padded output tensor. (B, odim, Tmax).
Returns
----------
Tensor
Batch of padded output tensor. (B, odim, Tmax).
"""
for i in six.moves.range(len(self.postnet)):

View File

@ -15,10 +15,8 @@
import paddle
from paddle import nn
from parakeet.modules.layer_norm import LayerNorm
from parakeet.modules.masked_fill import masked_fill
from typeguard import check_argument_types
@ -43,13 +41,18 @@ class VariancePredictor(nn.Layer):
dropout_rate: float=0.5, ):
"""Initilize duration predictor module.
Args:
idim (int): Input dimension.
n_layers (int, optional): Number of convolutional layers.
n_chans (int, optional): Number of channels of convolutional layers.
kernel_size (int, optional): Kernel size of convolutional layers.
dropout_rate (float, optional): Dropout rate.
Parameters
----------
idim : int
Input dimension.
n_layers : int, optional
Number of convolutional layers.
n_chans : int, optional
Number of channels of convolutional layers.
kernel_size : int, optional
Kernel size of convolutional layers.
dropout_rate : float, optional
Dropout rate.
"""
assert check_argument_types()
super().__init__()
@ -70,26 +73,30 @@ class VariancePredictor(nn.Layer):
n_chans, dim=1),
nn.Dropout(dropout_rate), ))
self.linear = nn.Linear(n_chans, 1)
self.linear = nn.Linear(n_chans, 1, bias_attr=True)
def forward(self, xs: paddle.Tensor,
x_masks: paddle.Tensor=None) -> paddle.Tensor:
"""Calculate forward propagation.
Args:
xs (Tensor): Batch of input sequences (B, Tmax, idim).
x_masks (ByteTensor, optional):
Parameters
----------
xs : Tensor
Batch of input sequences (B, Tmax, idim).
x_masks : Tensor(bool), optional
Batch of masks indicating padded part (B, Tmax, 1).
Returns:
Tensor: Batch of predicted sequences (B, Tmax, 1).
Returns
----------
Tensor
Batch of predicted sequences (B, Tmax, 1).
"""
# (B, idim, Tmax)
xs = xs.transpose([0, 2, 1])
# (B, C, Tmax)
for f in self.conv:
xs = f(xs) # (B, C, Tmax)
# (B, C, Tmax)
xs = f(xs)
# (B, Tmax, 1)
xs = self.linear(xs.transpose([0, 2, 1]))

View File

@ -16,23 +16,22 @@
import math
import numpy
import paddle
from paddle import nn
from paddle.fluid.layers import sequence_mask
from parakeet.modules.masked_fill import masked_fill
class MultiHeadedAttention(nn.Layer):
"""Multi-Head Attention layer.
Args:
n_head (int): The number of heads.
n_feat (int): The number of features.
dropout_rate (float): Dropout rate.
Parameters
----------
n_head : int
The number of heads.
n_feat : int
The number of features.
dropout_rate : float
Dropout rate.
"""
def __init__(self, n_head, n_feat, dropout_rate):
@ -42,33 +41,42 @@ class MultiHeadedAttention(nn.Layer):
# We assume d_v always equals d_k
self.d_k = n_feat // n_head
self.h = n_head
self.linear_q = nn.Linear(n_feat, n_feat)
self.linear_k = nn.Linear(n_feat, n_feat)
self.linear_v = nn.Linear(n_feat, n_feat)
self.linear_out = nn.Linear(n_feat, n_feat)
self.linear_q = nn.Linear(n_feat, n_feat, bias_attr=True)
self.linear_k = nn.Linear(n_feat, n_feat, bias_attr=True)
self.linear_v = nn.Linear(n_feat, n_feat, bias_attr=True)
self.linear_out = nn.Linear(n_feat, n_feat, bias_attr=True)
self.attn = None
self.dropout = nn.Dropout(p=dropout_rate)
def forward_qkv(self, query, key, value):
"""Transform query, key and value.
Args:
query (paddle.Tensor): Query tensor (#batch, time1, size).
key (paddle.Tensor): Key tensor (#batch, time2, size).
value (paddle.Tensor): Value tensor (#batch, time2, size).
Returns:
paddle.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
paddle.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
paddle.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).
Parameters
----------
query : paddle.Tensor
query tensor (#batch, time1, size).
key : paddle.Tensor
Key tensor (#batch, time2, size).
value : paddle.Tensor
Value tensor (#batch, time2, size).
Returns
----------
paddle.Tensor
Transformed query tensor (#batch, n_head, time1, d_k).
paddle.Tensor
Transformed key tensor (#batch, n_head, time2, d_k).
paddle.Tensor
Transformed value tensor (#batch, n_head, time2, d_k).
"""
n_batch = query.shape[0]
q = paddle.reshape(
self.linear_q(query), [n_batch, -1, self.h, self.d_k])
k = paddle.reshape(self.linear_k(key), [n_batch, -1, self.h, self.d_k])
v = paddle.reshape(
self.linear_v(value), [n_batch, -1, self.h, self.d_k])
# (batch, head, time1, d_k)
q = q.transpose((0, 2, 1, 3))
# (batch, head, time2, d_k)
@ -80,44 +88,40 @@ class MultiHeadedAttention(nn.Layer):
def forward_attention(self, value, scores, mask=None):
"""Compute attention context vector.
Args:
value (paddle.Tensor): Transformed value (#batch, n_head, time2, d_k).
scores (paddle.Tensor): Attention score (#batch, n_head, time1, time2).
mask (paddle.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).
Parameters
----------
value : paddle.Tensor
Transformed value (#batch, n_head, time2, d_k).
scores : paddle.Tensor
Attention score (#batch, n_head, time1, time2).
mask : paddle.Tensor
Mask (#batch, 1, time2) or (#batch, time1, time2).
Returns:
paddle.Tensor: Transformed value (#batch, time1, d_model)
Returns
----------
paddle.Tensor:
Transformed value (#batch, time1, d_model)
weighted by the attention score (#batch, time1, time2).
"""
n_batch = value.shape[0]
softmax = paddle.nn.Softmax(axis=-1)
if mask is not None:
mask = mask.unsqueeze(1)
# mask 取反, pad 的位置变成 true之后 pad 的位置被替换为 0
mask = paddle.logical_not(mask)
# mask = paddle.cast(mask, dtype='int64')
# mask ==1 的位置用 min_value 代替
# scores = scores.masked_fill(mask, min_value)
min_value = float(
numpy.finfo(
paddle.to_tensor(
0, dtype=scores.dtype).numpy().dtype).min)
scores = masked_fill(scores, mask, min_value)
self.attn = softmax(scores) # (batch, head, time1, time2)
# 用value填充tensor中与mask中值为1位置相对应的元素 == 保留 mask 为0 的值
# self.attn = torch.softmax(scores, dim=-1).masked_fill(
# mask, 0.0
# ) # (batch, head, time1, time2)
# 保留 mask 为 0 的位置,其他变成 0
# (batch, head, time1, time2)
self.attn = softmax(scores)
self.attn = masked_fill(self.attn, mask, 0.0)
else:
self.attn = softmax(scores) # (batch, head, time1, time2)
# (batch, head, time1, time2)
# (batch, head, time1, time2)
self.attn = softmax(scores)
# (batch, head, time1, time2)
p_attn = self.dropout(self.attn)
# (batch, head, time1, time2) * (batch, head, time2, d_k) -> # (batch, head, time1, d_k)
x = paddle.matmul(p_attn, value)
@ -130,16 +134,21 @@ class MultiHeadedAttention(nn.Layer):
def forward(self, query, key, value, mask=None):
"""Compute scaled dot product attention.
Args:
query (paddle.Tensor): Query tensor (#batch, time1, size).
key (paddle.Tensor): Key tensor (#batch, time2, size).
value (paddle.Tensor): Value tensor (#batch, time2, size).
mask (paddle.Tensor): Mask tensor (#batch, 1, time2) or
(#batch, time1, time2).
Returns:
paddle.Tensor: Output tensor (#batch, time1, d_model).
Parameters
----------
query : paddle.Tensor
Query tensor (#batch, time1, size).
key : paddle.Tensor
Key tensor (#batch, time2, size).
value : paddle.Tensor
Value tensor (#batch, time2, size).
mask : paddle.Tensor
Mask tensor (#batch, 1, time2) or (#batch, time1, time2).
Returns
----------
paddle.Tensor
Output tensor (#batch, time1, d_model).
"""
q, k, v = self.forward_qkv(query, key, value)
scores = paddle.matmul(q, k.transpose(

View File

@ -22,14 +22,16 @@ from paddle import nn
class PositionalEncoding(nn.Layer):
"""Positional encoding.
Args:
d_model (int): Embedding dimension.
dropout_rate (float): Dropout rate.
max_len (int): Maximum input length.
reverse (bool): Whether to reverse the input position. Only for
the class LegacyRelPositionalEncoding. We remove it in the current
class RelPositionalEncoding.
Parameters
----------
d_model : int
Embedding dimension.
dropout_rate : float
Dropout rate.
max_len : int
Maximum input length.
reverse : bool
Whether to reverse the input position. Only for
"""
def __init__(self, d_model, dropout_rate, max_len=5000, reverse=False):
@ -47,7 +49,6 @@ class PositionalEncoding(nn.Layer):
pe = paddle.zeros([x.shape[1], self.d_model])
if self.reverse:
# (x.shape[1],1)
position = paddle.arange(
x.shape[1] - 1, -1, -1.0, dtype=paddle.float32).unsqueeze(1)
else:
@ -65,12 +66,15 @@ class PositionalEncoding(nn.Layer):
def forward(self, x: paddle.Tensor):
"""Add positional encoding.
Args:
x (paddle.Tensor): Input tensor (batch, time, `*`).
Returns:
paddle.Tensor: Encoded tensor (batch, time, `*`).
Parameters
----------
x : paddle.Tensor
Input tensor (batch, time, `*`).
Returns
----------
paddle.Tensor
Encoded tensor (batch, time, `*`).
"""
self.extend_pe(x)
x = x * self.xscale + self.pe[:, :x.shape[1]]
@ -82,11 +86,14 @@ class ScaledPositionalEncoding(PositionalEncoding):
See Sec. 3.2 https://arxiv.org/abs/1809.08895
Args:
d_model (int): Embedding dimension.
dropout_rate (float): Dropout rate.
max_len (int): Maximum input length.
Parameters
----------
d_model : int
Embedding dimension.
dropout_rate : float
Dropout rate.
max_len : int
Maximum input length.
"""
def __init__(self, d_model, dropout_rate, max_len=5000):
@ -106,12 +113,15 @@ class ScaledPositionalEncoding(PositionalEncoding):
def forward(self, x):
"""Add positional encoding.
Args:
x (paddle.Tensor): Input tensor (batch, time, `*`).
Returns:
paddle.Tensor: Encoded tensor (batch, time, `*`).
Parameters
----------
x : paddle.Tensor
Input tensor (batch, time, `*`).
Returns
----------
paddle.Tensor
Encoded tensor (batch, time, `*`).
"""
self.extend_pe(x)
x = x + self.alpha * self.pe[:, :x.shape[1]]

View File

@ -12,19 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import numpy
import logging
import paddle
from paddle import nn
from paddle.nn import functional as F
from paddle.nn import initializer as I
from paddle.fluid.layers import sequence_mask
import sys
from parakeet.modules.fastspeech2_transformer.embedding import PositionalEncoding
from parakeet.modules.fastspeech2_transformer.attention import MultiHeadedAttention
from parakeet.modules.fastspeech2_transformer.attention import MultiHeadedAttention
from parakeet.modules.fastspeech2_transformer.multi_layer_conv import Conv1dLinear
from parakeet.modules.fastspeech2_transformer.multi_layer_conv import MultiLayeredConv1d
from parakeet.modules.fastspeech2_transformer.positionwise_feed_forward import PositionwiseFeedForward
@ -35,28 +27,44 @@ from parakeet.modules.fastspeech2_transformer.repeat import repeat
class Encoder(nn.Layer):
"""Transformer encoder module.
Args:
idim (int): Input dimension.
attention_dim (int): Dimention of attention.
attention_heads (int): The number of heads of multi head attention.
linear_units (int): The number of units of position-wise feed forward.
num_blocks (int): The number of decoder blocks.
dropout_rate (float): Dropout rate.
positional_dropout_rate (float): Dropout rate after adding positional encoding.
attention_dropout_rate (float): Dropout rate in attention.
input_layer (Union[str, paddle.nn.Layer]): Input layer type.
pos_enc_class (paddle.nn.Layer): Positional encoding module class.
Parameters
----------
idim : int
Input dimension.
attention_dim : int
Dimention of attention.
attention_heads : int
The number of heads of multi head attention.
linear_units : int
The number of units of position-wise feed forward.
num_blocks : int
The number of decoder blocks.
dropout_rate : float
Dropout rate.
positional_dropout_rate : float
Dropout rate after adding positional encoding.
attention_dropout_rate : float
Dropout rate in attention.
input_layer : Union[str, paddle.nn.Layer]
Input layer type.
pos_enc_class : paddle.nn.Layer
Positional encoding module class.
`PositionalEncoding `or `ScaledPositionalEncoding`
normalize_before (bool): Whether to use layer_norm before the first block.
concat_after (bool): Whether to concat attention layer's input and output.
normalize_before : bool
Whether to use layer_norm before the first block.
concat_after : bool
Whether to concat attention layer's input and output.
if True, additional linear will be applied.
i.e. x -> x + linear(concat(x, att(x)))
if False, no additional linear will be applied. i.e. x -> x + att(x)
positionwise_layer_type (str): "linear", "conv1d", or "conv1d-linear".
positionwise_conv_kernel_size (int): Kernel size of positionwise conv1d layer.
selfattention_layer_type (str): Encoder attention layer type.
padding_idx (int): Padding idx for input_layer=embed.
positionwise_layer_type : str
"linear", "conv1d", or "conv1d-linear".
positionwise_conv_kernel_size : int
Kernel size of positionwise conv1d layer.
selfattention_layer_type : str
Encoder attention layer type.
padding_idx : int
Padding idx for input_layer=embed.
"""
def __init__(
@ -82,7 +90,8 @@ class Encoder(nn.Layer):
self.conv_subsampling_factor = 1
if input_layer == "linear":
self.embed = nn.Sequential(
nn.Linear(idim, attention_dim),
nn.Linear(
idim, attention_dim, bias_attr=True),
nn.LayerNorm(attention_dim),
nn.Dropout(dropout_rate),
nn.ReLU(),
@ -169,14 +178,19 @@ class Encoder(nn.Layer):
def forward(self, xs, masks):
"""Encode input sequence.
Args:
xs (paddle.Tensor): Input tensor (#batch, time, idim).
masks (paddle.Tensor): Mask tensor (#batch, time).
Returns:
paddle.Tensor: Output tensor (#batch, time, attention_dim).
paddle.Tensor: Mask tensor (#batch, time).
Parameters
----------
xs : paddle.Tensor
Input tensor (#batch, time, idim).
masks : paddle.Tensor
Mask tensor (#batch, time).
Returns
----------
paddle.Tensor
Output tensor (#batch, time, attention_dim).
paddle.Tensor
Mask tensor (#batch, time).
"""
xs = self.embed(xs)
xs, masks = self.encoders(xs, masks)
@ -187,16 +201,23 @@ class Encoder(nn.Layer):
def forward_one_step(self, xs, masks, cache=None):
"""Encode input frame.
Args:
xs (paddle.Tensor): Input tensor.
masks (paddle.Tensor): Mask tensor.
cache (List[paddle.Tensor]): List of cache tensors.
Returns:
paddle.Tensor: Output tensor.
paddle.Tensor: Mask tensor.
List[paddle.Tensor]: List of new cache tensors.
Parameters
----------
xs : paddle.Tensor
Input tensor.
masks : paddle.Tensor
Mask tensor.
cache : List[paddle.Tensor]
List of cache tensors.
Returns
----------
paddle.Tensor
Output tensor.
paddle.Tensor
Mask tensor.
List[paddle.Tensor]
List of new cache tensors.
"""
xs = self.embed(xs)

View File

@ -14,28 +14,31 @@
"""Encoder self-attention layer definition."""
import paddle
from paddle import nn
class EncoderLayer(nn.Layer):
"""Encoder layer module.
Args:
size (int): Input dimension.
self_attn (paddle.nn.Layer): Self-attention module instance.
`MultiHeadedAttention` or `RelPositionMultiHeadedAttention` instance
can be used as the argument.
feed_forward (paddle.nn.Layer): Feed-forward module instance.
`PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance
can be used as the argument.
dropout_rate (float): Dropout rate.
normalize_before (bool): Whether to use layer_norm before the first block.
concat_after (bool): Whether to concat attention layer's input and output.
Parameters
----------
size : int
Input dimension.
self_attn : paddle.nn.Layer
Self-attention module instance.
`MultiHeadedAttention` instance can be used as the argument.
feed_forward : paddle.nn.Layer
Feed-forward module instance.
`PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance can be used as the argument.
dropout_rate : float
Dropout rate.
normalize_before : bool
Whether to use layer_norm before the first block.
concat_after : bool
Whether to concat attention layer's input and output.
if True, additional linear will be applied.
i.e. x -> x + linear(concat(x, att(x)))
if False, no additional linear will be applied. i.e. x -> x + att(x)
"""
def __init__(
@ -57,20 +60,26 @@ class EncoderLayer(nn.Layer):
self.normalize_before = normalize_before
self.concat_after = concat_after
if self.concat_after:
self.concat_linear = nn.Linear(size + size, size)
self.concat_linear = nn.Linear(size + size, size, bias_attr=True)
def forward(self, x, mask, cache=None):
"""Compute encoded features.
Args:
x_input (paddle.Tensor): Input tensor (#batch, time, size).
mask (paddle.Tensor): Mask tensor for the input (#batch, time).
cache (paddle.Tensor): Cache tensor of the input (#batch, time - 1, size).
Returns:
paddle.Tensor: Output tensor (#batch, time, size).
paddle.Tensor: Mask tensor (#batch, time).
Parameters
----------
x_input : paddle.Tensor
Input tensor (#batch, time, size).
mask : paddle.Tensor
Mask tensor for the input (#batch, time).
cache : paddle.Tensor
Cache tensor of the input (#batch, time - 1, size).
Returns
----------
paddle.Tensor
Output tensor (#batch, time, size).
paddle.Tensor
Mask tensor (#batch, time).
"""
residual = x
if self.normalize_before:
@ -82,7 +91,6 @@ class EncoderLayer(nn.Layer):
assert cache.shape == (x.shape[0], x.shape[1] - 1, self.size)
x_q = x[:, -1:, :]
residual = residual[:, -1:, :]
# non-pad mask 变成 pad mask
mask = None if mask is None else mask[:, -1:, :]
if self.concat_after:
@ -90,6 +98,7 @@ class EncoderLayer(nn.Layer):
(x, self.self_attn(x_q, x, x, mask)), axis=-1)
x = residual + self.concat_linear(x_concat)
else:
x = residual + self.dropout(self.self_attn(x_q, x, x, mask))
if not self.normalize_before:
x = self.norm1(x)

View File

@ -32,11 +32,16 @@ class MultiLayeredConv1d(paddle.nn.Layer):
def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate):
"""Initialize MultiLayeredConv1d module.
Args:
in_chans (int): Number of input channels.
hidden_chans (int): Number of hidden channels.
kernel_size (int): Kernel size of conv1d.
dropout_rate (float): Dropout rate.
Parameters
----------
in_chans : int
Number of input channels.
hidden_chans : int
Number of hidden channels.
kernel_size : int
Kernel size of conv1d.
dropout_rate : float
Dropout rate.
"""
super(MultiLayeredConv1d, self).__init__()
@ -58,14 +63,16 @@ class MultiLayeredConv1d(paddle.nn.Layer):
def forward(self, x):
"""Calculate forward propagation.
Args:
x (paddle.Tensor): Batch of input tensors (B, T, in_chans).
Returns:
paddle.Tensor: Batch of output tensors (B, T, in_chans).
Parameters
----------
x : paddle.Tensor
Batch of input tensors (B, T, in_chans).
Returns
----------
paddle.Tensor
Batch of output tensors (B, T, in_chans).
"""
# x = paddle.nn.ReLU(self.w_1(x.transpose(-1, 1))).transpose(-1, 1)
x = self.relu(self.w_1(x.transpose([0, 2, 1]))).transpose([0, 2, 1])
return self.w_2(self.dropout(x).transpose([0, 2, 1])).transpose(
[0, 2, 1])
@ -81,12 +88,16 @@ class Conv1dLinear(paddle.nn.Layer):
def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate):
"""Initialize Conv1dLinear module.
Args:
in_chans (int): Number of input channels.
hidden_chans (int): Number of hidden channels.
kernel_size (int): Kernel size of conv1d.
dropout_rate (float): Dropout rate.
Parameters
----------
in_chans : int
Number of input channels.
hidden_chans : int
Number of hidden channels.
kernel_size : int
Kernel size of conv1d.
dropout_rate : float
Dropout rate.
"""
super(Conv1dLinear, self).__init__()
self.w_1 = paddle.nn.Conv1D(
@ -95,18 +106,22 @@ class Conv1dLinear(paddle.nn.Layer):
kernel_size,
stride=1,
padding=(kernel_size - 1) // 2, )
self.w_2 = paddle.nn.Linear(hidden_chans, in_chans)
self.w_2 = paddle.nn.Linear(hidden_chans, in_chans, bias_attr=True)
self.dropout = paddle.nn.Dropout(dropout_rate)
self.relu = paddle.nn.ReLU()
def forward(self, x):
"""Calculate forward propagation.
Args:
x (paddle.Tensor): Batch of input tensors (B, T, in_chans).
Parameters
----------
x : paddle.Tensor
Batch of input tensors (B, T, in_chans).
Returns:
paddle.Tensor: Batch of output tensors (B, T, in_chans).
Returns
----------
paddle.Tensor
Batch of output tensors (B, T, in_chans).
"""
x = self.relu(self.w_1(x.transpose([0, 2, 1]))).transpose([0, 2, 1])

View File

@ -19,11 +19,14 @@ import paddle
class PositionwiseFeedForward(paddle.nn.Layer):
"""Positionwise feed forward layer.
Args:
idim (int): Input dimenstion.
hidden_units (int): The number of hidden units.
dropout_rate (float): Dropout rate.
Parameters
----------
idim : int
Input dimenstion.
hidden_units : int
The number of hidden units.
dropout_rate : float
Dropout rate.
"""
def __init__(self,
@ -33,8 +36,8 @@ class PositionwiseFeedForward(paddle.nn.Layer):
activation=paddle.nn.ReLU()):
"""Construct an PositionwiseFeedForward object."""
super(PositionwiseFeedForward, self).__init__()
self.w_1 = paddle.nn.Linear(idim, hidden_units)
self.w_2 = paddle.nn.Linear(hidden_units, idim)
self.w_1 = paddle.nn.Linear(idim, hidden_units, bias_attr=True)
self.w_2 = paddle.nn.Linear(hidden_units, idim, bias_attr=True)
self.dropout = paddle.nn.Dropout(dropout_rate)
self.activation = activation

View File

@ -29,12 +29,16 @@ class MultiSequential(paddle.nn.Sequential):
def repeat(N, fn):
"""Repeat module N times.
Args:
N (int): Number of repeat time.
fn (Callable): Function to generate module.
Returns:
MultiSequential: Repeated model instance.
Parameters
----------
N : int
Number of repeat time.
fn : Callable
Function to generate module.
Returns
----------
MultiSequential
Repeated model instance.
"""
return MultiSequential(* [fn(n) for n in range(N)])

View File

@ -19,10 +19,12 @@ import paddle
class LayerNorm(paddle.nn.LayerNorm):
"""Layer normalization module.
Args:
nout (int): Output dim size.
dim (int): Dimension to be normalized.
Parameters
----------
nout : int
Output dim size.
dim : int
Dimension to be normalized.
"""
def __init__(self, nout, dim=-1):
@ -33,12 +35,15 @@ class LayerNorm(paddle.nn.LayerNorm):
def forward(self, x):
"""Apply layer normalization.
Args:
x (torch.Tensor): Input tensor.
Returns:
torch.Tensor: Normalized tensor.
Parameters
----------
x : paddle.Tensor
Input tensor.
Returns
----------
paddle.Tensor
Normalized tensor.
"""
if self.dim == -1:
return super(LayerNorm, self).forward(x)

View File

@ -28,7 +28,7 @@ def is_broadcastable(shp1, shp2):
def masked_fill(xs: paddle.Tensor,
mask: paddle.Tensor,
value: Union[float, int]):
# assert is_broadcastable(xs.shape, mask.shape) is True
assert is_broadcastable(xs.shape, mask.shape) is True
bshape = paddle.broadcast_shape(xs.shape, mask.shape)
mask = mask.broadcast_to(bshape)
trues = paddle.ones_like(xs) * value

View File

@ -13,20 +13,27 @@
# limitations under the License.
import paddle
from paddle import nn
from typeguard import check_argument_types
# 按照这个 batch 里面最长的补零
def pad_list(xs, pad_value):
"""Perform padding for the list of tensors.
Args:
xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
pad_value (float): Value for padding.
Parameters
----------
xs : List[Tensor]
List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
pad_value : float)
Value for padding.
Returns:
Tensor: Padded tensor (B, Tmax, `*`).
Returns
----------
Tensor
Padded tensor (B, Tmax, `*`).
Examples:
Examples
----------
>>> x = [paddle.ones([4]), paddle.ones([2]), paddle.ones([1])]
>>> x
[tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
@ -34,11 +41,9 @@ def pad_list(xs, pad_value):
tensor([[1., 1., 1., 1.],
[1., 1., 0., 0.],
[1., 0., 0., 0.]])
"""
n_batch = len(xs)
max_len = max(x.shape[0] for x in xs)
# pad = xs[0].new(n_batch, max_len, *xs[0].shape[1:]).fill_(pad_value)
pad = paddle.full([n_batch, max_len, *xs[0].shape[1:]], pad_value)
for i in range(n_batch):
@ -50,13 +55,18 @@ def pad_list(xs, pad_value):
def make_pad_mask(lengths, length_dim=-1):
"""Make mask tensor containing indices of padded part.
Args:
lengths (LongTensor or List): Batch of lengths (B,).
Parameters
----------
lengths : LongTensor or List
Batch of lengths (B,).
Returns:
Tensor: Mask tensor containing indices of padded part bool.
Returns
----------
Tensor(bool)
Mask tensor containing indices of padded part bool.
Examples:
Examples
----------
With only lengths.
>>> lengths = [5, 3, 2]
@ -64,7 +74,6 @@ def make_pad_mask(lengths, length_dim=-1):
masks = [[0, 0, 0, 0 ,0],
[0, 0, 0, 1, 1],
[0, 0, 1, 1, 1]]
"""
if length_dim == 0:
raise ValueError("length_dim cannot be 0: {}".format(length_dim))
@ -88,17 +97,24 @@ def make_pad_mask(lengths, length_dim=-1):
def make_non_pad_mask(lengths, length_dim=-1):
"""Make mask tensor containing indices of non-padded part.
Args:
lengths (LongTensor or List): Batch of lengths (B,).
xs (Tensor, optional): The reference tensor.
Parameters
----------
lengths : LongTensor or List
Batch of lengths (B,).
xs : Tensor, optional
The reference tensor.
If set, masks will be the same shape as this tensor.
length_dim (int, optional): Dimension indicator of the above tensor.
length_dim : int, optional
Dimension indicator of the above tensor.
See the example.
Returns:
ByteTensor: mask tensor containing indices of padded part bool.
Returns
----------
Tensor(bool)
mask tensor containing indices of padded part bool.
Examples:
Examples
----------
With only lengths.
>>> lengths = [5, 3, 2]
@ -106,6 +122,37 @@ def make_non_pad_mask(lengths, length_dim=-1):
masks = [[1, 1, 1, 1 ,1],
[1, 1, 1, 0, 0],
[1, 1, 0, 0, 0]]
"""
return paddle.logical_not(make_pad_mask(lengths, length_dim))
def initialize(model: nn.Layer, init: str):
"""Initialize weights of a neural network module.
Parameters are initialized using the given method or distribution.
Custom initialization routines can be implemented into submodules
Parameters
----------
model : paddle.nn.Layer
Target.
init : str
Method of initialization.
"""
assert check_argument_types()
if init == "xavier_uniform":
nn.initializer.set_global_initializer(nn.initializer.XavierUniform(),
nn.initializer.Constant())
elif init == "xavier_normal":
nn.initializer.set_global_initializer(nn.initializer.XavierNormal(),
nn.initializer.Constant())
elif init == "kaiming_uniform":
nn.initializer.set_global_initializer(nn.initializer.KaimingUniform(),
nn.initializer.Constant())
elif init == "kaiming_normal":
nn.initializer.set_global_initializer(nn.initializer.KaimingNormal(),
nn.initializer.Constant())
else:
raise ValueError("Unknown initialization: " + init)